def try_params(n_instances, params, base, train, valid, test, istest): n_instances = int(round(n_instances)) # print "n_instances:", n_instances pprint(params) L = list([]) if params['missingMerge'] == False: L.append("-M") if params['binarizeNumericAttributes'] == True: L.append("-B") # print L search = ASSearch(classname="weka.attributeSelection.Ranker") evaluator = ASEvaluation(classname="weka.attributeSelection.InfoGainAttributeEval", options=L) clf = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier") clf.set_property("evaluator", evaluator.jobject) clf.set_property("search", search.jobject) clf.set_property("base", base.jobject) if istest: result = test_weka_classifier(clf, train, test) else: result = train_and_eval_weka_classifier(clf, train, valid, n_instances) return result
def try_params(n_instances, params, base, train, valid, test, istest): n_instances = int(round(n_instances)) pprint(params) L = list() if params['missingSeparate'] == True: L.append("-M") if params['locallyPredictive'] == False: L.append("-L") search = ASSearch(classname="weka.attributeSelection." + params['search']) evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=L) clf = Classifier( classname="weka.classifiers.meta.AttributeSelectedClassifier") clf.set_property("evaluator", evaluator.jobject) clf.set_property("base", base.jobject) if istest: result = test_weka_classifier(clf, train, test) else: result = train_and_eval_weka_classifier(clf, train, valid, n_instances) return result
def Feature_Selection(infile): directory = os.getcwd() + '/' csvpath = directory + infile jvm.start(packages=True, max_heap_size="4g") print "\n\n" print "Loaded file: ", infile csvloader = Loader(classname="weka.core.converters.CSVLoader") csvdata = csvloader.load_file(csvpath) remover = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", " 1"]) remover.inputformat(csvdata) filtered_data = remover.filter(csvdata) filtered_data.class_is_last() search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"]) evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"]) attribs = AttributeSelection() attribs.search(search) attribs.evaluator(evaluator) attribs.select_attributes(filtered_data) print "Summary of Attribute Selection: " print attribs.results_string jvm.stop() return
def search(self): """ Returns the search. :return: the search in use :rtype: ASSearch """ return ASSearch(jobject=javabridge.call(self.jobject, "getSearch", "()Lweka/attributeSelection/ASSearch;"))
def main(): """ Just runs some example code. """ # load a dataset anneal_file = helper.get_data_dir() + os.sep + "anneal.arff" helper.print_info("Loading dataset: " + anneal_file) loader = Loader("weka.core.converters.ArffLoader") anneal_data = loader.load_file(anneal_file) anneal_data.class_is_last() # perform attribute selection helper.print_title("Attribute selection") search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"]) evaluation = ASEvaluation( classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"]) attsel = AttributeSelection() attsel.search(search) attsel.evaluator(evaluation) attsel.select_attributes(anneal_data) print("# attributes: " + str(attsel.number_attributes_selected)) print("attributes (as numpy array): " + str(attsel.selected_attributes)) print("attributes (as list): " + str(list(attsel.selected_attributes))) print("result string:\n" + attsel.results_string) # perform ranking helper.print_title("Attribute ranking (2-fold CV)") search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-N", "-1"]) evaluation = ASEvaluation("weka.attributeSelection.InfoGainAttributeEval") attsel = AttributeSelection() attsel.ranking(True) attsel.folds(2) attsel.crossvalidation(True) attsel.seed(42) attsel.search(search) attsel.evaluator(evaluation) attsel.select_attributes(anneal_data) print("ranked attributes:\n" + str(attsel.ranked_attributes)) print("result string:\n" + attsel.results_string)
def get_search(params): # pprint(params) L = list([]) L.append("-N") L.append(str(params['num_attr'])) search = ASSearch(classname="weka.attributeSelection.Ranker", options=L) return search
def showAttributeRanking(self, data): search = ASSearch( classname="weka.attributeSelection.Ranker", options=["-T", "-1.7976931348623157E308", "-N", "-1"]) evaluator = ASEvaluation( classname="weka.attributeSelection.InfoGainAttributeEval") attsel = AttributeSelection() attsel.set_search(search) attsel.set_evaluator(evaluator) attsel.select_attributes(data) print("# attributes: " + str(attsel.get_number_attributes_selected())) print("attributes: " + str(attsel.get_selected_attributes())) print("result string:\n" + attsel.to_results_string())
def get_search(params): # pprint(params) L = list([]) L.append("-D") L.append(str(params['direction'])) L.append("-N") L.append(str(params['nodes'])) search = ASSearch(classname="weka.attributeSelection.BestFirst", options=L) return search
def filter_data(self, data): print("Filtering Data..\n") flter = Filter( classname="weka.filters.supervised.attribute.AttributeSelection") aseval = ASEvaluation( classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"]) assearch = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"]) flter.set_property("evaluator", aseval.jobject) flter.set_property("search", assearch.jobject) flter.inputformat(data) filtered = flter.filter(data) return filtered
def featureSelection(self): alg_search = ASSearch( classname="weka.attributeSelection.GeneticSearch", options=["-Z", "1024", "-G", "20", "-C", "0.6", "-M", "0.3"]) alg_evaluation = ASEvaluation( classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"]) feature_selection = AttributeSelection() feature_selection.search(alg_search) feature_selection.evaluator(alg_evaluation) feature_selection.select_attributes(self.original_data) self.selected_features = feature_selection.selected_attributes self.num_features = feature_selection.number_attributes_selected self.data_selected = feature_selection.reduce_dimensionality( self.original_data)
def cfs(table, cores): loader = Loader("weka.core.converters.CSVLoader") anneal_data = loader.load_file(table) anneal_data.class_is_last() logger.info("Running attribute selection for: " + str(table.split("/")[-1]) + ". Please, wait a moment.") search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "0", "-N", "5"]) evaluation = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-Z", "-P", cores, "-E", cores]) attsel = AttributeSelection() attsel.search(search) attsel.evaluator(evaluation) attsel.select_attributes(anneal_data) logger.info("Selected attributes: " + str(attsel.selected_attributes)) anneal_data.delete(index=None) # TO-DO: Borrar instancias aun no funciona return list(attsel.selected_attributes)
def use_filter(data): """ Uses the AttributeSelection filter for attribute selection. :param data: the dataset to use :type data: Instances """ print("\n2. Filter") flter = Filter(classname="weka.filters.supervised.attribute.AttributeSelection") aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval") assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"]) flter.set_property("evaluator", aseval.jobject) flter.set_property("search", assearch.jobject) flter.inputformat(data) filtered = flter.filter(data) print(str(filtered))
def use_low_level(data): """ Uses the attribute selection API directly. :param data: the dataset to use :type data: Instances """ print("\n3. Low-level") attsel = AttributeSelection() aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval") assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"]) attsel.jwrapper.setEvaluator(aseval.jobject) attsel.jwrapper.setSearch(assearch.jobject) attsel.select_attributes(data) indices = attsel.selected_attributes print("selected attribute indices (starting with 0):\n" + str(indices.tolist()))
def relieff(filter_data, feature_names): # define search and evaluation for ReliefF search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-T", "-1.7976931348623157E308", "-N", "-1"]) # last param is number of nearest neighbors evaluation = ASEvaluation(classname="weka.attributeSelection.ReliefFAttributeEval", options=["-M", "-1", "-D", "1", "-K", "10"]) # run the ReliefF alg relieff = AttributeSelection() relieff.search(search) relieff.evaluator(evaluation) relieff.select_attributes(filter_data) results = relieff.selected_attributes # weka wrapper returns the class col number with the results, so slice -1 return [feature_names[i] for i in results[:-1]]
def use_filter(data): """ Uses the AttributeSelection filter for attribute selection. :param data: the dataset to use :type data: Instances """ print("\n2. Filter") flter = wfilters.AttributeSelection() aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval") assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"]) flter.evaluator = aseval flter.search = assearch flter.inputformat(data) filtered = flter.filter(data) print(str(filtered)) print("Evaluator:\n", flter.evaluator) print("Search:\n", flter.search)
def feature_selection_weka(x_train, y_train, x_test, input_path, features): percent = int(x_train.shape[1] * (features / 100.0)) if not os.path.exists('Weka'): os.mkdir('Weka') if not os.path.exists(input_path + f'selected_features_weka_{features}.csv'): x_train = x_train.loc[:, (x_train != x_train.iloc[0]).any()] sava_data = x_train.copy() sava_data.columns = [str(a) + "a" for a in range(sava_data.shape[1])] sava_data['target'] = y_train sava_data.to_csv('Weka/train_weka_format.csv', index=False) from weka.attribute_selection import ASEvaluation, AttributeSelection, ASSearch from weka.core.converters import Loader, Saver loader = Loader(classname="weka.core.converters.CSVLoader") data = loader.load_file('Weka/train_weka_format.csv', class_index='last') search = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-C", "-R", "-N", f"{percent}"]) evaluator = ASEvaluation( classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1", "-L"]) attsel = AttributeSelection() attsel.search(search) attsel.evaluator(evaluator) attsel.select_attributes(data) ranked_attributes = pd.DataFrame(attsel.ranked_attributes, columns=['Feature', 'Rank']) ranked_attributes['Feature'] = ranked_attributes['Feature'].astype(int) set_of_features = ranked_attributes.loc[:percent - 1, 'Feature'] x_train.iloc[:, set_of_features].to_csv( input_path + f'selected_features_weka_{features}.csv') selected_features = x_train.iloc[:, set_of_features].columns else: selected_features = pd.read_csv( input_path + f'selected_features_weka_{features}.csv', index_col=0).columns x_train_filtered = x_train.loc[:, selected_features] x_val_filtered = x_test.loc[:, selected_features] return x_train_filtered, x_val_filtered
def information_gain(filter_data, feature_names): # last param determines how many attributes are returned # 2nd param controls the score threshold search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-T", "-1.7976931348623157E308", "-N", "-1"]) # has no params evaluation = ASEvaluation( classname="weka.attributeSelection.InfoGainAttributeEval", options=[]) # run the Information Gain alg info_gain = AttributeSelection() info_gain.search(search) info_gain.evaluator(evaluation) info_gain.select_attributes(filter_data) results = info_gain.selected_attributes # weka wrapper returns the class col number with the results, so slice -1 return [feature_names[i] for i in results[:-1]]
def get_search(params): # pprint(params) L = list([]) if params['conservation'] == False: L.append("-C") if params['backward'] == False: L.append("-B") if params['ranked'] == False: L.append("-R") search = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=L) return search
def use_classifier(data): """ Uses the meta-classifier AttributeSelectedClassifier for attribute selection. :param data: the dataset to use :type data: Instances """ print("\n1. Meta-classifier") classifier = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier") aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval") assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"]) base = Classifier(classname="weka.classifiers.trees.J48") # setting nested options is always a bit tricky, getting all the escaped double quotes right # simply using the bean property for setting Java objects is often easier and less error prone classifier.set_property("classifier", base.jobject) classifier.set_property("evaluator", aseval.jobject) classifier.set_property("search", assearch.jobject) evaluation = Evaluation(data) evaluation.crossvalidate_model(classifier, data, 10, Random(1)) print(evaluation.summary())
def all_feature(file): jvm.start(packages=True) data = converters.load_any_file(file) data.class_is_last() search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-T", "-1.7976931348623157E308", "-N", "-1"]) attsel = AttributeSelection() attsel.search(search) evaluator = ASEvaluation( classname="weka.attributeSelection.ChiSquaredAttributeEval") attsel.evaluator(evaluator) attsel.select_attributes(data) t = attsel.ranked_attributes[:, 0] chi = t.astype(int) evaluator = ASEvaluation( classname="weka.attributeSelection.InfoGainAttributeEval") attsel.evaluator(evaluator) attsel.select_attributes(data) t = attsel.ranked_attributes[:, 0] info_gain = t.astype(int) evaluator = ASEvaluation( classname="weka.attributeSelection.GainRatioAttributeEval") attsel.evaluator(evaluator) attsel.select_attributes(data) t = attsel.ranked_attributes[:, 0] gain_ratio = t.astype(int) evaluator = ASEvaluation( classname="weka.attributeSelection.SymmetricalUncertAttributeEval") attsel.evaluator(evaluator) attsel.select_attributes(data) t = attsel.ranked_attributes[:, 0] symmetric_uncertainty = t.astype(int) jvm.stop() return chi, info_gain, gain_ratio, symmetric_uncertainty
def get_IG(ofile_dir, loader): data = loader.load_file(ofile_dir) data.class_is_last() evaluator = ASEvaluation(classname="weka.attributeSelection.InfoGainAttributeEval") search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-T", "-1.7976931348623157E308", "-N", "-1"]) attsel = AttributeSelection() attsel.search(search) attsel.evaluator(evaluator) attsel.select_attributes(data) results = {} if attsel.number_attributes_selected < 2: flag = 0 output = attsel.results_string for i in output.split('\n'): if (flag != 0): if len(i.split(' '))>2: t=[] for f in i.split(' '): if f!='': t.append(f) r_tax = '' for c in range(len(t)): if c>1: r_tax = r_tax+t[c]+' ' results.update({str(r_tax.strip()): float(t[0].strip())}) else: break if "Ranked attributes" in i: flag = 1 mean_score = sum(results.values())/len(results.values()) os.system("rm -r "+ofile_dir) else: results = dict([(str(data.attribute(attr[0]).name), attr[1]) for attr in attsel.ranked_attributes]) mean_score = attsel.ranked_attributes[:,1].mean() return results, mean_score
def try_params(n_instances, params, base, train, valid, test, istest): n_instances = int(round(n_instances)) # print "n_instances:", n_instances pprint(params) L = list([]) if params['weightByDistance'] == True: L.append("-W") L.append("-M") L.append(str(params['sampleSize'])) L.append("-K") L.append(str(params['numNeighbours'])) L.append("-A") L.append(str(params['sigma'])) # print L search = ASSearch(classname="weka.attributeSelection.Ranker") evaluator = ASEvaluation( classname="weka.attributeSelection.ReliefFAttributeEval", options=L) clf = Classifier( classname="weka.classifiers.meta.AttributeSelectedClassifier") clf.set_property("evaluator", evaluator.jobject) clf.set_property("search", search.jobject) clf.set_property("base", base.jobject) if istest: result = test_weka_classifier(clf, train, test) else: result = train_and_eval_weka_classifier(clf, train, valid, n_instances) return result
def select_attribute(file): global Field50 global Field10 global Field5 global Field2 global a filename = file.parts[-1] # Get filename from Pathlib object dir = file.parents[0] # Data directory currently in print("Selecting attributes from %s" % filename) if not filename.endswith(".arff"): print("%s not ARFF file." % filename) return filename_base = filename[:-5] # Removes '.arff' from filename data = load_Arff_file(file) # Load data from arff data.class_is_first() # Set first attr as class # Define Attribute selection search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-T", "0.01", "-N", "-1"]) # Define Attribute Evaluator evaluator = ASEvaluation( classname="weka.attributeSelection.CorrelationAttributeEval", options=[]) # Run attribution selection attsel = AttributeSelection() attsel.search(search) attsel.evaluator(evaluator) attsel.select_attributes(data) # Define filepath and output results attsel_output = filename_base + "_attsel_results.txt" output_select_attribute(attsel, dir / attsel_output) # Debug Analysis print(attsel.selected_attributes) for i in range(2): Field2.append(attsel.selected_attributes[i]) for i in range(5): Field5.append(attsel.selected_attributes[i]) for i in range(10): Field10.append(attsel.selected_attributes[i]) for i in range(50): Field50.append(attsel.selected_attributes[i]) print(Field2) print(Field5) print(Field10) print(Field50) if len(set(Field10)) == len(Field10): print("no duplicates found") else: print("duplicate found") Field50 = list(set(Field50)) Field10 = list(set(Field10)) Field5 = list(set(Field5)) Field2 = list(set(Field2))
def createTrainedModel(): from weka.core.converters import Loader folderList = os.listdir(outputModel) i = 0 classi = "" loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(os.path.join(outputModel, "genderTrain.arff")) data.class_is_last() from weka.classifiers import Classifier classi = "weka.classifiers.bayes.NaiveBayes" cls = Classifier(classname=classi) from weka.attribute_selection import ASSearch, ASEvaluation, AttributeSelection search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-1.7976931348623157E308", "-1"]) #evaluator = ASEvaluation(classname="weka.attributeSelection.ChiSquaredAttributeEval") #attsel = AttributeSelection() #attsel.search(search) #attsel.evaluator(evaluator) #attsel.select_attributes(data) cls.build_classifier(data) import weka.core.serialization as serialization from weka.core.dataset import Instances serialization.write_all( os.path.join(outputModel, "GenderModel" + ".model"), [cls, Instances.template_instances(data)]) from weka.classifiers import Evaluation from weka.core.classes import Random evl = Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) print "Gender model predictions" print cls #print(evl.percent_correct) print(evl.summary()) print(evl.class_details()) data = loader.load_file(os.path.join(outputModel, "ageTrain.arff")) data.class_is_last() classi = "weka.classifiers.bayes.NaiveBayes" cls = Classifier(classname=classi) from weka.attribute_selection import ASSearch, ASEvaluation, AttributeSelection search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-1.7976931348623157E308", "-1"]) #evaluator = ASEvaluation(classname="weka.attributeSelection.ChiSquaredAttributeEval") #attsel = AttributeSelection() #attsel.search(search) #attsel.evaluator(evaluator) #attsel.select_attributes(data) #classi = "weka.classifiers.trees.J48" #classi = "weka.classifiers.functions.Logistic" #classi = "weka.classifiers.trees.RandomForest" #classi = "weka.classifiers.bayes.NaiveBayes" #classi = "weka.classifiers.functions.SMOreg" cls.build_classifier(data) print "Age model predictions" print cls import weka.core.serialization as serialization from weka.core.dataset import Instances serialization.write_all(os.path.join(outputModel, "AgeModel" + ".model"), [cls, Instances.template_instances(data)]) evl = Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) #print(evl.percent_correct) print(evl.summary()) print(evl.class_details()) os._exit(0)
chisq_installed = False for p in pkg.installed_packages(): if p.name == chisq_name: chisq_installed = True if not chisq_installed: pkg.install_package(chisq_name) print("pkg %s installed, please restart" % chisq_name) jvm.stop() sys.exit(1) data_dir = "\\\\egr-1l11qd2\\CLS_lab\\Junya Zhao\\GWAS SNPs_2018\\random50_combo_Nonoverlap_\\" globbed_files = glob.glob(data_dir + "*.csv") for csv in globbed_files: data = converters.load_any_file(csv) data.class_is_last() search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "10"]) evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "E", "1"]) attsel = AttributeSelection() attsel.folds(10) attsel.crossvalidation(True) attsel.seed(1) attsel.search(search) attsel.evaluator(evaluator) attsel.select_attributes(data) evl = Evaluation(data) print("# attributes: " + str(attsel.number_attributes_selected)) print("attributes: " + str(attsel.selected_attributes)) print("result string:\n" + attsel.results_string) print(evl) # write the report for each file
chisq_installed = False for p in pkg.installed_packages(): if p.name == chisq_name: chisq_installed = True if not chisq_installed: pkg.install_package(chisq_name) print("pkg %s installed, please restart" % chisq_name) #jvm.stop() #sys.exit(1) data_dir = "\\\\egr-1l11qd2\\CLS_lab\\Junya Zhao\\\Data driven model _paper [June 25_2018\\NonOverlap_featureSelection\\mRMRReport\\" globbed_files = glob.glob(data_dir + "*.csv") for csv in globbed_files: data = converters.load_any_file(csv) data.class_is_last() search = ASSearch( classname="weka.attributeSelection.RerankingSearch", options=["-method", "2", "-blockSize", "50", "-rankingMeasure", "0"]) evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "E", "1"]) attsel = AttributeSelection() attsel.folds(10) attsel.crossvalidation(True) attsel.seed(1) attsel.search(search) attsel.evaluator(evaluator) attsel.select_attributes(data) evl = Evaluation(data) print("# attributes: " + str(attsel.number_attributes_selected)) print("attributes: " + str(attsel.selected_attributes)) print("result string:\n" + attsel.results_string) print(evl)
def main(): jvm.start(packages=True, max_heap_size="4g") print( "Hi! This is a protected command, please insert the password to proceed!" ) for x in range(3): password = input('') if password.strip() == 'DMMLproject': print("All good!") break else: if x == 2: print( "This command is protected and can be used only by an administrator, please use another command." ) return else: print("Wrong password, please provide the correct password") loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file("dataSources/fraud.arff") print("Before Preprocessing: \n") classStats = data.attribute_stats(22).nominal_counts print("#instances(Class 0): ", classStats[0]) print("#instances(Class 1): ", classStats[1]) preProcessedData = preprocess(data) print("After Preprocessing: \n") classStats = preProcessedData.attribute_stats( preProcessedData.class_index).nominal_counts print("#instances(Class 0): ", classStats[0]) print("#instances(Class 1): ", classStats[1]) # setup classifier with attribute selection classifier = Classifier( classname="weka.classifiers.meta.AttributeSelectedClassifier") aseval = ASEvaluation( classname="weka.attributeSelection.InfoGainAttributeEval") assearch = ASSearch(classname="weka.attributeSelection.Ranker", options=["-N", "4"]) classifier.set_property("evaluator", aseval.jobject) classifier.set_property("search", assearch.jobject) base1 = Classifier(classname="weka.classifiers.bayes.NaiveBayes") base2 = Classifier(classname="weka.classifiers.trees.RandomForest", options=[ "-P", "70", "-I", "30", "-num-slots", "1", "-K", "0", "-M", "1.0", "-S", "1", "-depth", "50" ]) base3 = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.25", "-M", "2"]) base4 = Classifier(classname="weka.classifiers.trees.J48", options=["-U", "-M", "2"]) base5 = Classifier(classname="weka.classifiers.trees.HoeffdingTree", options=[ "-L", "2", "-S", "1", "-E", "1.0E7", "-H", "0.05", "-M", "0.01", "-G", "200.0", "-N", "0.0" ]) base6 = Classifier(classname="weka.classifiers.lazy.IBk", options=['-K', '1', '-W', '0']) base7 = Classifier(classname="weka.classifiers.bayes.BayesNet") # naive bayes - cross validate - traintestSplit print("----------NaiveBayes----------") classifier.set_property("classifier", base1.jobject) classify(preProcessedData, classifier, True, 'models/naiveBayes.model', splitPerc=70, randomSeed=10) classify(preProcessedData, classifier, False, 'models/naiveBayes.model', splitPerc=70, randomSeed=10) # random forest - cross validate - traintestSplit print("----------RandomForest----------") classifier.set_property("classifier", base2.jobject) classify(preProcessedData, classifier, True, 'models/randomForest.model', splitPerc=70, randomSeed=10) classify(preProcessedData, classifier, False, 'models/randomForest.model', splitPerc=70, randomSeed=10) # decision tree (with pruning) - cross validate - traintestSplit print("----------DecisionTree----------") classifier.set_property("classifier", base3.jobject) classify(preProcessedData, classifier, True, 'models/prunedJ48.model', splitPerc=70, randomSeed=10) classify(preProcessedData, classifier, False, 'models/prunedJ48.model', splitPerc=70, randomSeed=10) # decision tree (without pruning) - cross validate - traintestSplit print("----------DecisionTreeUnpruned----------") classifier.set_property("classifier", base4.jobject) classify(preProcessedData, classifier, True, 'models/unprunedJ48.model', splitPerc=70, randomSeed=10) classify(preProcessedData, classifier, False, 'models/unprunedJ48.model', splitPerc=70, randomSeed=10) # Hoeffding tree - cross validate - traintestSplit print("----------HoeffdingTree----------") classify(preProcessedData, base5, True, 'models/HoeffdingTree.model', splitPerc=70, randomSeed=10) classify(preProcessedData, base5, False, 'models/HoeffdingTree.model', splitPerc=70, randomSeed=10) # K-Nearest-Neighbours - cross validate - traintestSplit print("----------KNN----------") classifier.set_property("classifier", base6.jobject) classify(preProcessedData, classifier, False, 'models/knn.model', splitPerc=70, randomSeed=10) classify(preProcessedData, classifier, True, 'models/preProcessedJ48.model', splitPerc=70, randomSeed=10) # bayesian belief networks - cross validate - traintestSplit print("----------BayesianBelief----------") classifier.set_property("classifier", base7.jobject) classify(preProcessedData, classifier, True, 'models/bayesianBelief.model', splitPerc=70, randomSeed=10) classify(preProcessedData, classifier, False, 'models/bayesianBelief.model', splitPerc=70, randomSeed=10)
from weka.core.converters import Loader loader = Loader(classname="weka.core.converters.CSVLoader") dataTrain = loader.load_file(path + '/' + str(Window[window]) + 'd_FOLDS_train_' + str(fold) + '.csv') dataTest = loader.load_file(path + '/' + str(Window[window]) + 'd_FOLDS_test_' + str(fold) + '.csv') dataTrain.class_is_last() dataTest.class_is_last() from weka.attribute_selection import AttributeSelection, ASEvaluation, ASSearch search = ASSearch( classname="weka.attributeSelection.RerankingSearch" ) #,options=["-method", "2"]) evaluator = ASEvaluation( classname='weka.attributeSelection.ClassifierAttributeEval', options=['-B', 'weka.classifiers.bayes.NaiveBayes']) Eval = AttributeSelection( classname='weka.attributeSelection.ClassifierAttributeEval', options=[ '-B', 'weka.classifiers.bayes.NaiveBayes', '--', "-S 'weka.attributeSelection.RerankingSearch -method 2'" ]) from weka.filters import Filter NominalToBinary = Filter(
(["-F", "10", "-T", "-1", "-B", "weka.classifiers.trees.J48"], ["-D", "0"]), (["-F", "10", "-T", "-1", "-B", "weka.classifiers.trees.J48"], ["-D", "0", "-N", "1"]), (["-F", "10", "-T", "-1", "-B", "weka.classifiers.trees.J48"], ["-D", "1", "-N", "2"]), (["-F", "10", "-T", "-1", "-B", "weka.classifiers.trees.J48"], ["-D", "2", "-N", "2"]), ) # attribute selection for setup in setups: evl, search = setup aseval = ASEvaluation( classname="weka.attributeSelection.WrapperSubsetEval", options=evl) assearch = ASSearch(classname="weka.attributeSelection.BestFirst", options=search) print("\n--> Attribute selection\n") print(aseval.to_commandline()) print(assearch.to_commandline()) attsel = AttributeSelection() attsel.evaluator(aseval) attsel.search(assearch) attsel.select_attributes(data) print(attsel.results_string) # cross-validation aseval = ASEvaluation(classname="weka.attributeSelection.WrapperSubsetEval", options=["-F", "10", "-B", "weka.classifiers.trees.J48"]) assearch = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "0", "-N", "5"]) print("\n--> Attribute selection (cross-validation)\n")
if p.name == chisq_name: chisq_installed = True if not chisq_installed: pkg.install_package(chisq_name) print("pkg %s installed, please restart" % chisq_name) jvm.stop() sys.exit(1) """ data_dir = "\\\\egr-1l11qd2\\CLS_lab\\Junya Zhao\\Data driven model _paper [June 25_2018\\FeatureSelection\\EvlSearch\\" globbed_files = glob.glob(data_dir + "*.csv") for csv in globbed_files: data = converters.load_any_file(csv) data.class_is_last() search = ASSearch(classname="weka.attributeSelection.EvolutionarySearch", options=[ "-population-size", "200", "-generations", "500", "-crossover-probability", "0.6" ]) evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "E", "1"]) attsel = AttributeSelection() attsel.folds(10) attsel.crossvalidation(True) attsel.seed(1) attsel.search(search) attsel.evaluator(evaluator) attsel.select_attributes(data) evl = Evaluation(data) print("# attributes: " + str(attsel.number_attributes_selected)) print("attributes: " + str(attsel.selected_attributes)) print("result string:\n" + attsel.results_string)
( ["-F", "10", "-T", "-1", "-B", "weka.classifiers.trees.J48"], ["-D", "1", "-N", "2"] ), ( ["-F", "10", "-T", "-1", "-B", "weka.classifiers.trees.J48"], ["-D", "2", "-N", "2"] ), ) # attribute selection for setup in setups: evl, search = setup aseval = ASEvaluation(classname="weka.attributeSelection.WrapperSubsetEval", options=evl) assearch = ASSearch(classname="weka.attributeSelection.BestFirst", options=search) print("\n--> Attribute selection\n") print(aseval.to_commandline()) print(assearch.to_commandline()) attsel = AttributeSelection() attsel.evaluator(aseval) attsel.search(assearch) attsel.select_attributes(data) print(attsel.results_string) # cross-validation aseval = ASEvaluation(classname="weka.attributeSelection.WrapperSubsetEval", options=["-F", "10", "-B", "weka.classifiers.trees.J48"]) assearch = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "0", "-N", "5"]) print("\n--> Attribute selection (cross-validation)\n")