def logit_PC(df_train, df_test, attr_label): ''' logistic regression with PC members only :param df_train: training data, pandas data frame :param df_test: testing data, pandas data frame :param attr_label: label attribute, string :return: PC members, logistic regression model and AUC ''' pcs = RF.learnPC_R(df_train, attr_label) if pcs: # model = LogisticRegression().fit(df_train[pcs], df_train[attr_label]) # pred = model.predict_proba(df_test[pcs]) # pred = [x[1] for x in pred] # auc = evaluate_auc(df_test[attr_label].values.tolist(), pred) df2Instances = DF2Instances(df_train[pcs+[attr_label]], 'train', attr_label) data_train = df2Instances.df_to_instances() data_train.class_is_last() # set class attribute model = Classifier(classname="weka.classifiers.functions.Logistic") model.build_classifier(data_train) df2Instances = DF2Instances(df_test[pcs+[attr_label]], 'test', attr_label) data_test = df2Instances.df_to_instances() data_test.class_is_last() # set class attribute preds = [] for index, inst in enumerate(data_test): preds.append(model.distribution_for_instance(inst)[1]) auc = evaluate_auc(df_test[attr_label].values.tolist(), preds) return pcs, model, auc else: return pcs, None, None
def train_and_predict_instances(self, trainingFile, classifier): loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(trainingFile) data.class_is_last() classes = [str(code) for code in data.class_attribute.values] head = [className + " probability" for className in classes] head.append("Guess") cls = Classifier(classname=classifier) cls.build_classifier(data) predictions = [[0, 0]] * len(data) realLabels = [""] * len(data) guess = [0] * len(data) for index, inst in enumerate(data): pred = cls.classify_instance(inst) if inst.get_value(inst.class_index) == pred: guess[index] = 1.0 else: guess[index] = 0.0 dist = cls.distribution_for_instance(inst) predictions[index] = [p for p in dist] realLabels[index] = classes[int(inst.get_value(inst.class_index))] print( str(index + 1) + ": label index=" + str(pred) + ", class distribution=" + str(dist)) return [predictions, guess, head, realLabels]
def TestClassification(arff, modelInput, results): # 启动java虚拟机 jvm.start() # 导入分析模型 objects = serialization.read_all(modelInput) clsf = Classifier(jobject=objects[0]) print(clsf) # 导入测试组 loader = Loader(classname="weka.core.converters.ArffLoader") test = loader.load_file(arff) test.class_is_first() # 分析结果 resultsFile = open(results, "w") resultsFile.write("序号\t原判断\t预测\t良性概率\t恶性概率\n") print("序号\t原判断\t预测\t良性概率\t恶性概率") for index, inst in enumerate(test): pred = clsf.classify_instance(inst) dist = clsf.distribution_for_instance(inst) sampleID = index + 1 origin = inst.get_string_value(inst.class_index) prediction = inst.class_attribute.value(int(pred)) sameAsOrigin = "yes" if pred != inst.get_value( inst.class_index) else "no" NRate = dist.tolist()[0] PRate = dist.tolist()[1] resultsFile.write( "%d\t%s\t%s\t%s\t%s" % (sampleID, origin, prediction, str(NRate), str(PRate)) + "\n") print("%d\t%s\t%s\t%s\t%s" % (sampleID, origin, prediction, str(NRate), str(PRate))) resultsFile.close() # 退出java虚拟机 jvm.stop() print("检测完成")
def PredecirUnaTemporada(path): jvm.start() insta = CrearInstanciaParaPredecir(path) atributos = "" file = open('ModelData/wekaHeader.arff', 'r') atributos = file.readlines() file.close() file = open('ModelData/predictionFiles/inst.arff', 'w') file.writelines(atributos) file.write("\n" + insta + '\n') file.close() objects = serialization.read_all("ModelData/77PercentModelPaisajes.model") classifier = Classifier(jobject=objects[0]) loader = Loader() data = loader.load_file("ModelData/predictionFiles/inst.arff") data.class_is_last() clases = ["invierno", "verano", "otono", "primavera"] prediccion = "" for index, inst in enumerate(data): pred = classifier.classify_instance(inst) dist = classifier.distribution_for_instance(inst) prediccion = clases[int(pred)] jvm.stop() return prediccion
def main(args): """ Trains a J48 classifier on a training set and outputs the predicted class and class distribution alongside the actual class from a test set. Class attribute is assumed to be the last attribute. :param args: the commandline arguments (train and test datasets) :type args: list """ # load a dataset helper.print_info("Loading train: " + args[1]) loader = Loader(classname="weka.core.converters.ArffLoader") train = loader.load_file(args[1]) train.class_index = train.num_attributes - 1 helper.print_info("Loading test: " + args[2]) test = loader.load_file(args[2]) test.class_is_last() # classifier cls = Classifier(classname="weka.classifiers.trees.J48") cls.build_classifier(train) # output predictions print("# - actual - predicted - error - distribution") for index, inst in enumerate(test): pred = cls.classify_instance(inst) dist = cls.distribution_for_instance(inst) print("%d - %s - %s - %s - %s" % (index + 1, inst.get_string_value( inst.class_index), inst.class_attribute.value(int(pred)), "yes" if pred != inst.get_value(inst.class_index) else "no", str(dist.tolist())))
def main(args): """ Trains a J48 classifier on a training set and outputs the predicted class and class distribution alongside the actual class from a test set. Class attribute is assumed to be the last attribute. :param args: the commandline arguments (train and test datasets) :type args: list """ # load a dataset helper.print_info("Loading train: " + args[1]) loader = Loader(classname="weka.core.converters.ArffLoader") train = loader.load_file(args[1]) train.class_index = train.num_attributes - 1 helper.print_info("Loading test: " + args[2]) test = loader.load_file(args[2]) test.class_is_last() # classifier cls = Classifier(classname="weka.classifiers.trees.J48") cls.build_classifier(train) # output predictions print("# - actual - predicted - error - distribution") for index, inst in enumerate(test): pred = cls.classify_instance(inst) dist = cls.distribution_for_instance(inst) print( "%d - %s - %s - %s - %s" % (index+1, inst.get_string_value(inst.class_index), inst.class_attribute.value(int(pred)), "yes" if pred != inst.get_value(inst.class_index) else "no", str(dist.tolist())))
class python_weka(object): def __init__(self, input_x, input_y, labels): self.input_x = input_x self.input_y = input_y self.labels = labels def write_arff(self, filename, relation, train_or_predict, input_x, input_y=None): f = open(filename, "w") f.write("@relation " + relation + "\n") for i in self.labels: train_or_predict += 1 if train_or_predict == len(self.labels): break f.write("@attribute " + i + " " + self.labels[i] + "\n") f.write("\n") f.write("@data" + "\n") for i in range(len(input_x)): for j in input_x[i]: f.write(str(j) + " ") if train_or_predict == 0: f.write(str(input_y[i])) else: f.write(str(0)) f.write("\n") f.close() def train(self): filename = "train.arff" self.write_arff(filename, "train", 0, self.input_x, self.input_y) loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(filename) data.class_is_last() self.cls = Classifier(classname="weka.classifiers.meta.Bagging", options=["-S", "5"]) self.cls.build_classifier(data) os.remove(filename) def predict(self, test_data): filename = "test.arff" self.write_arff(filename, "test", 0, test_data) loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(filename) data.class_is_last() # evl = Evaluation(data) # evl.evaluate_model(self.cls,data) # data.set_class_label(data.numAttributes() - 1) # data.setClassIndex(data.numAttributes() - 1) result = [] for index, inst in enumerate(data): pred = self.cls.classify_instance(inst) dist = self.cls.distribution_for_instance(inst) result.append(dist[0]) # print(str(index+1) + ": label index=" + str(pred) + ", class distribution=" + str(dist)) # print str(index+1) + 'dist:'+ str(dist) os.remove(filename) return result
def predictWithWeka(csvFilenameWithInputToPredict, modelFilename): """ # Nota: para usar sin conocer la clase, se puede colocar una clase dummy # e ignorar los valores actual y error de @return results. # # Nota: es necesario que el archivo de nombre @csvFilenameWithInputToPredict # contenga instancias de ambas clases (spam y sanas) # # @csvFilenameWithInputToPredict : nombre del archivo csv con las instancias # a predecir. # # @modelFilename : nombre del archivo de modelo generado por weka y # compatible con el archivo csv de entrada # # @return results : lista de diccionarios con los siguientes indices # index, actual, predicted, error y distribution """ loader = Loader(classname="weka.core.converters.CSVLoader") cls = Classifier(jobject=serialization.read(modelFilename)) #print(cls) data = loader.load_file(csvFilenameWithInputToPredict) data.class_is_last() multi = MultiFilter() remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"]) numericToNom = Filter( classname="weka.filters.unsupervised.attribute.NumericToNominal", options=["-R", "8,11"]) normalize = Filter( classname="weka.filters.unsupervised.attribute.Normalize", options=["-S", "1.0", "-T", "0.0"]) multi.filters = [remove, numericToNom, normalize] multi.inputformat(data) test = multi.filter(data) results = [] for index, inst in enumerate(test): result = dict() pred = cls.classify_instance(inst) dist = cls.distribution_for_instance(inst) result["index"] = index + 1 result["actual"] = inst.get_string_value(inst.class_index) result["predicted"] = inst.class_attribute.value(int(pred)) result["error"] = "yes" if pred != inst.get_value( inst.class_index) else "no" result["distribution"] = str(dist.tolist()) results.append(result) #print result return results
def LMT(self): model = Classifier(classname="weka.classifiers.trees.LMT") model.build_classifier(self.data_train) print(model) preds = [] for index, inst in enumerate(self.data_test): preds.append(model.distribution_for_instance(inst)[1]) auc = evaluate_auc(self.df_test[self.attr_label].values.tolist(), preds) return auc
def predict(attributes): jvm.start() file_path = print_to_file(attributes) # load the saved model objects = serialization.read_all("/Users/hosyvietanh/Desktop/data_mining/trained_model.model") classifier = Classifier(jobject=objects[0]) loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(file_path) data.class_is_last() for index, inst in enumerate(data): pred = classifier.classify_instance(inst) dist = classifier.distribution_for_instance(inst) return int(pred) jvm.stop()
def DT(self): model = Classifier(classname="weka.classifiers.trees.J48") model.build_classifier(self.data_train) preds = [] for index, inst in enumerate(self.data_test): preds.append(model.distribution_for_instance(inst)[1]) auc = evaluate_auc(self.df_test[self.attr_label].values.tolist(), preds) ### scikit learn decision tree # from sklearn.tree import DecisionTreeClassifier # model = DecisionTreeClassifier().fit(self.df_train[self.attributes], self.df_train[self.attr_label]) # pred = model.predict_proba(self.df_test[self.attributes]) # pred = [x[1] for x in pred] # auc = evaluate_auc(self.df_test[self.attr_label].values.tolist(), pred) return auc
def logit(self): model = Classifier(classname="weka.classifiers.functions.Logistic") model.build_classifier(self.data_train) preds = [] for index, inst in enumerate(self.data_test): preds.append(model.distribution_for_instance(inst)[1]) auc = evaluate_auc(self.df_test[self.attr_label].values.tolist(), preds) ### scikit learn logit # from sklearn.linear_model import LogisticRegression # model = LogisticRegression().fit(self.df_train[self.attributes], self.df_train[self.attr_label]) # pred = model.predict_proba(self.df_test[self.attributes]) # pred = [x[1] for x in pred] # auc = evaluate_auc(self.df_test[self.attr_label].values.tolist(), pred) return auc
def functionProcessamento(self, ca1_r, ca1_l, ca2_ca3_r, ca2_ca3_l, sub_r, sub_l, sexo, id): jvm.start() path = os.path.dirname(os.path.abspath(__file__)) # TODO: verificar qual o sexo do individuo para carregar o modelo corretamente modelo = path + "\\naive_bayes_feminino_novo.model" if (sexo == "Male"): print("É masculino") modelo = path + "\\naive_bayes_feminino_novo.model" objects = serialization.read_all(modelo) classifier = Classifier(jobject=objects[0]) loader = Loader(classname="weka.core.converters.ArffLoader") arquivo = open(path + "\\novo_individuo.arff", "w") conteudo = list() conteudo.append("@relation alzheimer \n\n") conteudo.append("@attribute doente {SIM, NAO} \n") conteudo.append("@attribute ca1_right real \n") conteudo.append("@attribute ca1_left real \n") conteudo.append("@attribute ca2_ca3_right real\n") conteudo.append("@attribute ca2_ca3_left real \n") conteudo.append("@attribute subic_right real \n") conteudo.append("@attribute subic_left real \n\n") conteudo.append("@data \n") #aqui passar as variáveis conteudo.append("SIM," + str(ca1_r) + "," + str(ca1_l) + "," + str(ca2_ca3_r) + "," + str(ca2_ca3_l) + "," + str(sub_r) + "," + str(sub_l)) print(conteudo) arquivo.writelines(conteudo) arquivo.close() data = loader.load_file(path + "\\novo_individuo.arff") data.class_is_last() for index, inst in enumerate(data): pred = classifier.classify_instance(inst) dist = classifier.distribution_for_instance(inst) pc_doenca = round(((pred) * 100), 2) pc_saudavel = round(((100 - pc_doenca)), 2) print(" Porcentagem de alzheimer=" + str(pc_doenca) + "%, porcentagem saudavel=" + str(pc_saudavel) + "%") alzheimer = Alzheimer.objects.get(id=id) alzheimer.resultado_ad = pc_doenca alzheimer.resultado_cn = pc_saudavel alzheimer.status_seg = 2 alzheimer.save() jvm.stop()
def run(): jvm.start() load_csv = Loader("weka.core.converters.CSVLoader") data_csv = load_csv.load_file( "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.csv" ) saver = Saver("weka.core.converters.ArffSaver") saver.save_file( data_csv, "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.arff" ) load_arff = Loader("weka.core.converters.ArffLoader") data_arff = load_arff.load_file( "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.arff" ) data_arff.class_is_last() cls = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.5"]) cls.build_classifier(data_arff) for index, inst in enumerate(data_arff): pred = cls.classify_instance(inst) dist = cls.distribution_for_instance(inst) # save tree prune in txt file saveFile = open( "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.txt", "w") saveFile.write(str(cls)) # print(cls) saveFile.close() global j48 J48_class = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.25", "-M", "2"]) J48_class.build_classifier(data_arff) evaluationj48 = Evaluation(data_arff) evaluationj48.crossvalidate_model(J48_class, data_arff, 10, Random(100)) j48 = str(evaluationj48.percent_correct) jvm.stop() return j48
def train_and_separate_validation(self, trainingSet, validationSet, validationInstancesNames, classifier): loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(trainingSet) data.class_is_last() data2 = loader.load_file(validationSet) if not len(data2) == len(validationInstancesNames): print( "Theres a mismatch between the number of instances in the arff file and the list of instance names." ) raise LookupError data2.class_is_last() classes = [str(code) for code in data.class_attribute.values] header = [[classifier, trainingSet, "", "", ""], ["Instance"] + [className + " probability" for className in classes] + ["Real Class", "Guess"]] cls = Classifier(classname=classifier) print("Training.") cls.build_classifier(data) print("Model done!") dataMatrix = [["", 0, 0, 0, ""] for i in range(len(data2))] print("Validating.") for index, inst in enumerate(data2): print("Instance: " + str(index + 1) + "/" + str(len(data2))) pred = cls.classify_instance(inst) if inst.get_value(inst.class_index) == pred: guessValue = 1.0 else: guessValue = 0.0 dist = cls.distribution_for_instance(inst) dataMatrix[index][0] = validationInstancesNames[index] dataMatrix[index][1:3] = [round(p, 2) for p in dist] dataMatrix[index][3] = classes[int(inst.get_value( inst.class_index))] dataMatrix[index][4] = guessValue print("Done\n") return [header, dataMatrix]
def getDecisionTree(self, inputPath): #load arff data = self.load_Arff(inputPath) #classifier data.set_class_index(data.num_attributes() - 1) # set class attribute classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"]) data.set_class_index(data.num_attributes() - 1) classifier.build_classifier(data) classifierStr = str(classifier) for index in range(0, data.num_instances()): instance = data.get_instance(index) #print instance result = classifier.distribution_for_instance(instance) #print result graph = classifier.graph() return graph
def bayes_classifier(features): #carrega o dataset instancias = load_any_file("caracteristicas.arff") # sinaliza que o ultimo atributo é a classe instancias.class_is_last() # Carrega o classificafor Naive Bayes e Classifica com base nas características da imagem classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayes") classifier.build_classifier(instancias) # Cria uma nova instância com base nas caracteristicas extraidas new_instance = Instance.create_instance(features) # Adiciona a nova instância ao dataset instancias.add_instance(new_instance) # Liga a nova instancia ao dataset new_instance.dataset = instancias # Classifica a nova instância trazendo as probabilidades de ela pertencer as classes definidas classification = classifier.distribution_for_instance(new_instance) print("Classificação", " - Apu: ", round(classification[0] * 100, 2), " Nelson: ", round(classification[1], 2)) return classification
def getDecisionTree(self, inputPath): #load arff data = self.load_Arff(inputPath) #classifier data.set_class_index(data.num_attributes() - 1) # set class attribute classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"]) data.set_class_index(data.num_attributes() - 1) classifier.build_classifier(data) classifierStr = str(classifier) for index in range(0,data.num_instances()): instance = data.get_instance(index) #print instance result = classifier.distribution_for_instance(instance) #print result graph = classifier.graph() return graph
def classify(train, test, name="RF", tuning=False): jvm.start() if isinstance(train, list) and isinstance(test, list): train = weka_instance(train) trn_data = converters.load_any_file(train) test = weka_instance(test) tst_data = converters.load_any_file(test) elif os.path.isfile(train) and os.path.isfile(test): trn_data = converters.load_any_file(train) tst_data = converters.load_any_file(test) else: trn = csv_as_ndarray(train) tst = csv_as_ndarray(test) trn_data = converters.ndarray_to_instances(trn, relation="Train") tst_data = converters.ndarray_to_instances(tst, relation="Test") trn_data.class_is_last() tst_data.class_is_last() # t = time() if tuning: opt = tune(train) else: opt = default_opt # print("Time to tune: {} seconds".format(time() - t)) cls = Classifier(classname=classifiers[name.lower()], options=opt) cls.build_classifier(trn_data) distr = [cls.distribution_for_instance(inst)[1] for inst in tst_data] preds = [cls.classify_instance(inst) for inst in tst_data] jvm.stop() return preds, distr
class ObjectiveClassifier: def __init__(self, model_path, senti_path, stop_words, ngrams_path): self.loader = Loader(classname="weka.core.converters.ArffLoader") self.features_calculator = FeaturesCalculator(ngrams_path) self.classifier = Classifier(jobject=serialization.read(model_path)) self.normalizer = Preprocessor(senti_path) self.stop_words = stop_words def classify_tweet(self, tweet, polarity='"positive"'): tweet_normalized = self.normalizer.preprocess(tweet, self.stop_words, "") self.features_calculator.calculateFeatures( tweet_normalized, "output/tweet_features_objective.arff", polarity) tweet_features = self.loader.load_file( "output/tweet_features_objective.arff") tweet_features.class_is_last() for index, inst in enumerate(tweet_features): pred = self.classifier.classify_instance(inst) dist = self.classifier.distribution_for_instance(inst) print("%d - %s - %s" % (index + 1, inst.class_attribute.value( int(pred)), str(dist.tolist())))
class J48: def __init__(self): jvm.start() data_dir = "./DataSet/" self.data = converters.load_any_file(data_dir + "chatbot2.arff") self.data.class_is_last() self.cls = Classifier(classname="weka.classifiers.trees.J48") self.cls.build_classifier(self.data) self.intens = self.data.attribute_by_name("intent") def transformUserInput(self,user_input): ''' Transforma la entrada del usuario a una representación de 1s y 0s para poder realizar una predicción. :param str entrada del usuario :return str de 1s y 0s :rtype str ''' attributes = self.data.attribute_names() data_size = len(attributes) vector_input = ['0']*(data_size) words = user_input.split() attribute_map = { attributes[i] : i for i in range(len(attributes)) } for word in words: if word in attributes: vector_input[attribute_map.get(word)] = '1' vector_input[data_size-1] = Instance.missing_value() return vector_input def getIntent(self,user_input): ''' Identifica el intent por medio de una entrada de usuario y una data haciendo una predicción. :param str entrada del usuario :param data representación del dataset de GLaDOS :return cadena con el intent identificado :rtype str ''' vector_input = self.transformUserInput(user_input) inst = Instance.create_instance(vector_input) #print(inst) self.data.add_instance(inst) for index, inst in enumerate(self.data): pred = int(self.cls.classify_instance(inst)) dist = self.cls.distribution_for_instance(inst) #print("{}: label index={}, class distribution={}".format(index+1, pred, dist)) intent = "desconocido" pred = int(self.cls.classify_instance(inst)) dist = self.cls.distribution_for_instance(inst) #print("{}: label index={}, class distribution={}".format(index+1, pred, dist)) if max(dist) > 0.7: intent = self.intens.value(pred) return intent
instances = loader.load_file( "/home/farzad/Desktop/jrnl/semiSupervisedPython/originDataset/bupa/train.arff" ) instances.class_is_last() tree = Classifier(classname="weka.classifiers.trees.J48") tree.build_classifier(instances) # clsLabel = j48.classify_instance(data.get_instance(0)) # print("====================================>",clsLabel) p_train = np.zeros(shape=(instances.num_instances, 1)) y_train = np.zeros(shape=(instances.num_instances, 1)) for i, instance in enumerate(instances): dist = tree.distribution_for_instance(instance) p_train[i] = [(dist[1] - 0.5) * 2.0] y_train[i] = [tree.classify_instance(instance)] print("p_train ======> > > >>>> > > >>>> ", len(p_train)) print("p_train ======> > > >>>> > > >>>> ", len(y_train)) print("p_train ======> > > >>>> > > >>>> ", instances.num_instances) # print("p_train ======> > > >>>> > > >>>> " , p_train) # print("p_train ======> > > >>>> > > >>>> " , p_train.reshape( -1, 1 )) import warnings warnings.filterwarnings("ignore", category=FutureWarning) lr = LR(solver='lbfgs') lr.fit(p_train, np.ravel(y_train, order='C')) # LR needs X to be 2-dimensional # lr.fit( p_train.reshape( -1, 1 ), y_train ) # LR needs X to be 2-dimensional
def python_wrapper(mImage, prefix, file_name, pre_prefix, dir, permanent_dir, model): # Initialization of weka machine learning library weka_machine_learning = WML.WekaMachineLearning() # tokenization of images token = re.split('RGB_|.png', mImage) ir_directory = token[0] + 'IR_' + token[1] + '.pgm' mat_directory = token[0] + 'Mat_' + token[1] # get mat and ir image image = segmentor.getImage(ir_directory) mat = segmentor.readMatFile(mat_directory) # image processing edges = segmentor.edgeDetector(image) type = segmentor.getTypeOfFruit(image) segmentation = segmentor.segmentation(image, type) filter = segmentor.filterImageFromSegmentation(image, segmentation) output_seg = segmentor.imageMapping(filter, mat['IR']) ####################-Anomaly Detection via INFLO-################### # file prefix creation for the csv file to save prefix_csv = prefix + "\\" + file_name # if folder is not there then create it # and right the csv to the folder if not os.path.exists(prefix): os.mkdir(prefix) csv = segmentor.writeToCSV(output_seg, prefix_csv) print "file is written" #else simply write the csv to the folder else: csv = segmentor.writeToCSV(output_seg, prefix_csv) print "file is written" #call the INFLO.bat after segmenting the image #for anomaly detection run_batch_file("rapid_miner_pro_ifruitlfy.bat") ############################-Clustering-############################ # image file directory is stored in ir_directory # mat file directory is stored in mat_directory # and need to get the INFLO file # directory for INFLO file is prefix_csv anomaly_file = prefix_csv + '.csv_INFLO.csv' # directory for the temperorary files is made so # some results can be stored and processed auto- # matically by the rapid miner 5, this folder is demo_printing_picture(permanent_dir, prefix, mImage, pre_prefix, dir, file_name) print( "END OF ANOMALY DETECTION CLICK TRAIN AND SHOW RESULT FOR PROCESSING") write_temp_dir = permanent_dir + "\\" print prefix print file_name # Clean the junk of the output files if os.path.exists(permanent_dir + "//output.csv"): os.remove(permanent_dir + "//output.csv") features = iFruitFly_clustering.cluster_analysis.cluster_analysis( ir_directory, permanent_dir + "\\output_INFLO.csv", mat_directory, dir + "\\" + file_name, prefix, file_name, permanent_dir) if (features == None): print("Image cant be segmented due to poor calibiration") #other files are stored for the user in the junk else: print "printing images->>>>>>> ", prefix + file_name image_plotter(features, ir_directory, prefix + file_name) import csv # Weka Machine Learning Inclusion on 5/30/2017 # adding one extra column with open(permanent_dir + "\\output.csv", 'r') as csvinput: with open(permanent_dir + "\\output_n.csv", 'w') as csvoutput: writer = csv.writer(csvoutput, lineterminator='\n') reader = csv.reader(csvinput) all = [] row = next(reader) row.append('result') all.append(row) for row in reader: row.append(0) all.append(row) writer.writerows(all) #model = "J:\iFruitFly\Python Scripts\Model 1\\model.model" data_dir = permanent_dir + "\\output_n.csv" #data_dir_open = open(data_dir) #r = csv.reader(data_dir_open) jvm.start() loader = Loader(classname="weka.core.converters.CSVLoader") data = loader.load_file(data_dir) # using the serialization library for # opening the model objects = serialization.read_all(model) classifier = Classifier(jobject=objects[0]) print "Model Classified" print classifier data.class_is_last() for index, inst in enumerate(data): pred = classifier.classify_instance(inst) dist = classifier.distribution_for_instance(inst) print pred
def classifyTest(fileToClassify, fileToCompare, predictionYear=None, pastResultYears=None, classifier=None): # Start Java VM jvm.start(max_heap_size="1024m") # Load CSV files into weka loader loader = Loader(classname="weka.core.converters.CSVLoader") fileToClassifyData = loader.load_file(fileToClassify) fileToClassifyData.class_is_last() fileToCompareData = loader.load_file(fileToCompare) fileToCompareData.class_is_last() # Generate Classifier based on data classifier = Classifier( classname="weka.classifiers.functions.MultilayerPerceptron", options=[ "-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E", "20", "-H", "a" ]) classifier.build_classifier(fileToClassifyData) print(classifier) # Var builder for graph count = 0.0 countPred = 0.0 graphDetails = [ ['TITLE'], ['NFL Data Ratings (Official) {0}'.format(pastResultYears), [], []], ['NFL Data Ratings (Predicted) {0}'.format(predictionYear), [], []] ] # Time to predict results based on classifier for index, inst in enumerate(fileToCompareData): pred = classifier.classify_instance(inst) temp = list(enumerate(inst))[-1][1] countPred += pred count += temp # index=list(enumerate(inst))[3+1][1] index += 1 print('YOLO', list(enumerate(inst))[3][1]) print("{0:.3f} accurate compared to results.".format(countPred / count)) dist = classifier.distribution_for_instance(inst) # NFL Results graphDetails[1][1].append(index) graphDetails[1][2].append(temp) # Predicted Results graphDetails[2][1].append(index) graphDetails[2][2].append(pred) print( str(index + 1) + ": label index=" + str(pred) + ", class distribution=" + str(dist) + " , original: " + str(temp)) graphDetails[0][ 0] = 'Player Rating Predictions For {0} ({1:.3f} Accurate)'.format( predictionYear, 100 - (countPred / count)) jvm.stop() print(graphDetails) BuildGraph(graphDetails)
class Weka(object): data = None dataDir = None classifier = None def __init__(self, dataDir = '.'): self.dataDir = dataDir jvm.start() # Inicializa dados com conteudo do arquivo arff def initData(self, arrfFile): loader = Loader(classname="weka.core.converters.ArffLoader") print self.dataDir + '/' + arrfFile self.data = loader.load_file(self.dataDir + '/' + arrfFile) self.data.class_is_last() print 'Carregando arquivo ' + self.dataDir + '/' + arrfFile # print(data) # Realiza o treinamento do classificador def trainData(self, arrfFile = None, classname="weka.classifiers.trees.J48", options=["-C", "0.3"]): if arrfFile is not None: self.initData( arrfFile ) if self.data is None: return print 'Contruindo classificador ' + str(classname) + ' ' + ' '.join(options) self.classifier = Classifier(classname=classname, options=options) self.classifier.build_classifier(self.data) # Realiza a classificacao das instancias de um arquivo arff def classify(self, predictFile): if self.data is None or self.classifier is None: return [-1] loader = Loader(classname="weka.core.converters.ArffLoader") predict_data = loader.load_file(self.dataDir + '/' + predictFile) predict_data.class_is_last() values = str(predict_data.class_attribute)[19:-1].split(',') classes = [] for index, inst in enumerate(predict_data): #pred = self.classifier.classify_instance(inst) prediction = self.classifier.distribution_for_instance(inst) cl = int(values[prediction.argmax()][7:]) print 'Classe:', cl classes.append(cl) return classes # Realiza uma validação cruzada e mostra os resultados na saída padrão def crossValidate(self, arrfFile = None, classname="weka.classifiers.trees.J48", options=["-C", "0.3"]): if arrfFile is not None: self.initData( arrfFile ) if self.data is None: return print 'Classificador ' + str(classname) + ' ' + ' '.join(options) cls = Classifier(classname=classname, options=options) evl = Evaluation(self.data) evl.crossvalidate_model(cls, self.data, 10, Random(1)) print(evl.percent_correct) print(evl.summary()) print(evl.class_details())
def main(): global stop_spinning, name, upper_clothing, lower_clothing, outer_clothing, shoes_clothing, upper_indices, lower_indices, outer_indices, shoes_indices ''' Classifies clothing using stored classification models for each user ''' FSM = ClothingFSM() #FSM.username_server() clothingdb = MySQLdb.connect(host="localhost", user="******", passwd="mypassword", # Change to your SQL DB password db = "userprofiles") cursor = clothingdb.cursor() cursor.execute("SELECT * FROM clothing") name = "Study" #Populate clothing dictionaries with user's wardrobe for row in cursor.fetchall(): print str(row[2]) print str(row[6]) if str(row[0]) == name: if str(row[1]) == "Upper Body": try: upper_clothing[row[2]].append(row[6]) except: print "Problem appending clothing to dictionary" if str(row[1]) == "Lower Body": try: lower_clothing[row[3]].append(row[6]) except: print "Problem appending clothing to dictionary" if str(row[1]) == "Outerwear": try: outer_clothing[row[4]].append(row[6]) except: print "Problem appending clothing to dictionary" if str(row[1]) == "Shoes": try: shoes_clothing[row[5]].append(row[6]) except: print "Problem appending clothing to dictionary" print upper_clothing, lower_clothing, outer_clothing, shoes_clothing # FSM.received_user_info() #In final program, we will receive this information from database #Set to true or false if receiving features vs testing defaults receive_features = True if receive_features == False: #Wait to Receive input #Example inputs from user/weather API features['casual_formal'] = 3 #5 is very comfortable 1 is not comfortable features['comfort'] = 3 #1 is not snowing 2 is light snow 3 is heavy snow features['snow'] = 1 #1 is not raining 3 is raining(no medium) features['rain'] = 3 #If user is spending their time mostly outside, set warmth to outsidewarmth. If not, set warmth features['warmth'] = 1 features['outside_warmth'] = 4 #1 is no 0 is yes features['athletic'] = 1 snowstring = '' rainstring = '' athleticstring = '' else: FSM.features_server() upper_array = [None] * 14 lower_array = [None] * 7 outer_array = [None] * 3 shoes_array = [None] * 4 upper_prediction_array = [] lower_prediction_array = [] outer_prediction_array = [] shoes_prediction_array = [] warmth_att = Attribute.create_numeric("Warmth") comfort_att = Attribute.create_numeric("Comfort") casual_att = Attribute.create_numeric("Casual") rain_att = Attribute.create_numeric("Rain") snow_att = Attribute.create_numeric("Snow") athletic_att = Attribute.create_numeric("Athletic") upper_attributes = [warmth_att, casual_att, comfort_att, athletic_att] lower_attributes = [warmth_att, casual_att, comfort_att, athletic_att] outer_attributes = [warmth_att, casual_att, comfort_att, snow_att, rain_att] shoes_attributes = [casual_att, comfort_att, athletic_att] Instances.create_instances("upper_instances", upper_attributes, 0) Instances.create_instances("lower_instances", lower_attributes, 0) Instances.create_instances("outer_instances", outer_attributes, 0) Instances.create_instances("shoes_instances", shoes_attributes, 0) #Simulate their wardrobe #Upper # Tank Top if len(upper_clothing['Tank Top']) == 0: upper_array[0] = 0 else: upper_array[0] = 1 # T-Shirt if len(upper_clothing['T-Shirt']) == 0: upper_array[1] = 0 else: upper_array[1] = 1 # Long-Sleeved Shirt if len(upper_clothing['Long-sleeved Shirt']) == 0: upper_array[2] = 0 else: upper_array[2] = 1 # Athletic Top if len(upper_clothing['Athletic Top']) == 0: upper_array[3] = 0 else: upper_array[3] = 1 # Button-down Shirt if len(upper_clothing['Button-down Shirt']) == 0: upper_array[4] = 0 else: upper_array[4] = 1 # Polo Shirt if len(upper_clothing['Polo Shirt']) == 0: upper_array[5] = 0 else: upper_array[5] = 1 # Dress Shirt if len(upper_clothing['Dress Shirt']) == 0: upper_array[6] = 0 else: upper_array[6] = 1 # Suit Jacket if len(upper_clothing['Suit Jacket']) == 0: upper_array[7] = 0 else: upper_array[7] = 1 # Blazer if len(upper_clothing['Blazer']) == 0: upper_array[8] = 0 else: upper_array[8] = 1 # Hoodie if len(upper_clothing['Hoodie']) == 0: upper_array[9] = 0 else: upper_array[9] = 1 # Sweater if len(upper_clothing['Sweater']) == 0: upper_array[10] = 0 else: upper_array[10] = 1 # Blouse if len(upper_clothing['Blouse']) == 0: upper_array[11] = 0 else: upper_array[11] = 1 # Day Dress if len(upper_clothing['Day Dress']) == 0: upper_array[12] = 0 else: upper_array[12] = 1 # Evening Dress if len(upper_clothing['Evening Dress']) == 0: upper_array[13] = 0 else: upper_array[13] = 1 #Lower # Regular Shorts if len(lower_clothing['Shorts']) == 0: lower_array[0] = 0 else: lower_array[0] = 1 # Athletic Shorts if len(lower_clothing['Athletic Shorts']) == 0: lower_array[1] = 0 else: lower_array[1] = 1 # Athletic Pants if len(lower_clothing['Athletic Pants']) == 0: lower_array[2] = 0 else: lower_array[2] = 1 # Jeans if len(lower_clothing['Jeans']) == 0: lower_array[3] = 0 else: lower_array[3] = 1 # Trousers if len(lower_clothing['Trousers']) == 0: lower_array[4] = 0 else: lower_array[4] = 1 # Skirt if len(lower_clothing['Skirt']) == 0: lower_array[5] = 0 else: lower_array[5] = 1 # Dress Pants if len(lower_clothing['Dress Pants']) == 0: lower_array[6] = 0 else: lower_array[6] = 1 #Outer # Light Jacket if len(outer_clothing['Light Jacket']) == 0: outer_array[0] = 0 else: outer_array[0] = 1 # Heavy Jacket if len(outer_clothing['Winter Jacket']) == 0: outer_array[1] = 0 else: outer_array[1] = 1 # Rain Jacket if len(outer_clothing['Rain Jacket']) == 0: outer_array[2] = 0 else: outer_array[2] = 1 #Shoes # Casual Shoes if len(shoes_clothing['Casual Shoes']) == 0: shoes_array[0] = 0 else: shoes_array[0] = 1 # Athletic Shoes if len(shoes_clothing['Athletic Shoes']) == 0: shoes_array[1] = 0 else: shoes_array[1] = 1 # Dress Shoes if len(shoes_clothing['Dress Shoes']) == 0: shoes_array[2] = 0 else: shoes_array[2] = 1 # Dressy Casual Shoes if len(shoes_clothing['Business Casual Shoes']) == 0: shoes_array[3] = 0 else: shoes_array[3] = 1 upper_list = [features['outside_warmth'], features['casual_formal'], features['comfort'], features['athletic']] lower_list = [features['outside_warmth'], features['casual_formal'], features['comfort'], math.fabs(1-features['athletic'])] outer_list = [features['outside_warmth'], features['casual_formal'], features['comfort'], features['rain'], features['snow']] shoes_list = [features['casual_formal'], features['comfort'], math.fabs(1-features['athletic'])] upper_instance = Instance.create_instance(upper_list, classname='weka.core.DenseInstance', weight= 1.0) lower_instance = Instance.create_instance(lower_list, classname='weka.core.DenseInstance', weight= 1.0) outer_instance = Instance.create_instance(outer_list, classname='weka.core.DenseInstance', weight= 1.0) shoes_instance = Instance.create_instance(shoes_list, classname='weka.core.DenseInstance', weight= 1.0) upper_path = '/home/leo/models/uppermodel2.model' lower_path = '/home/leo/models/lowermodel2.model' outer_path = '/home/leo/models/outermodel2.model' shoes_path = '/home/leo/models/shoesmodel7.model' upper_classifier = Classifier(jobject=serialization.read(upper_path)) lower_classifier = Classifier(jobject=serialization.read(lower_path)) outer_classifier = Classifier(jobject=serialization.read(outer_path)) shoes_classifier = Classifier(jobject=serialization.read(shoes_path)) upper_predictions = upper_classifier.distribution_for_instance(upper_instance) lower_predictions = lower_classifier.distribution_for_instance(lower_instance) outer_predictions = outer_classifier.distribution_for_instance(outer_instance) shoes_predictions = shoes_classifier.distribution_for_instance(shoes_instance) if features['rain'] == 1: rainstring = 'No' if features['rain'] == 3: rainstring = 'Yes' if features['snow'] == 1: snowstring = 'No' if features['snow'] == 3: snowstring = 'Yes' if features['athletic'] == 1: athleticstring = 'No' if features['athletic'] == 0: athleticstring = 'Yes' print "Features being Classified:" print "Outside Warmth:", features['outside_warmth'], "Inside-Outside:", features['inside_outside'], "Casual-Formal:", features['casual_formal'], "Comfort:", features['comfort'], "Athletic:", athleticstring, "Rain:", rainstring, "Snow:", snowstring #Remove Clothing Options User Doesn't Own for i in range(len(upper_array)): if upper_array[i] == 0: upper_prediction_array.append(0) else: upper_prediction_array.append(upper_predictions[i]) for i in range(len(lower_array)): if lower_array[i] == 0: lower_prediction_array.append(0) else: lower_prediction_array.append(lower_predictions[i]) for i in range(len(outer_array)): if outer_array[i] == 0: outer_prediction_array.append(0) else: outer_prediction_array.append(outer_predictions[i]) for i in range(len(shoes_array)): if shoes_array[i] == 0: shoes_prediction_array.append(0) else: shoes_prediction_array.append(shoes_predictions[i]) #Find the top 3 options for each classifier max_index_upper1 = 0 max_index_upper2 = 0 max_index_upper3 = 0 max_index_upper4 = 0 max_index_upper5 = 0 for i in range(1,len(upper_prediction_array)): n = upper_prediction_array[max_index_upper1] if upper_prediction_array[i] > n: max_index_upper1 = i upper_prediction_array[max_index_upper1] = 0 for i in range(1, len(upper_prediction_array)): n = upper_prediction_array[max_index_upper2] if upper_prediction_array[i] > n: max_index_upper2 = i upper_prediction_array[max_index_upper2] = 0 for i in range(1, len(upper_prediction_array)): n = upper_prediction_array[max_index_upper3] if upper_prediction_array[i] > n: max_index_upper3 = i upper_prediction_array[max_index_upper3] = 0 for i in range(1, len(upper_prediction_array)): n = upper_prediction_array[max_index_upper4] if upper_prediction_array[i] > n: max_index_upper4 = i upper_prediction_array[max_index_upper4] = 0 for i in range(1, len(upper_prediction_array)): n = upper_prediction_array[max_index_upper5] if upper_prediction_array[i] > n: max_index_upper5 = i upper_indices = [max_index_upper1, max_index_upper2, max_index_upper3, max_index_upper4, max_index_upper5] max_index_lower1 = 0 max_index_lower2 = 0 max_index_lower3 = 0 max_index_lower4 = 0 max_index_lower5 = 0 for i in range(1,len(lower_prediction_array)): n = lower_prediction_array[max_index_lower1] if lower_prediction_array[i] > n: max_index_lower1 = i lower_prediction_array[max_index_lower1] = 0 for i in range(1,len(lower_prediction_array)): n = lower_prediction_array[max_index_lower2] if lower_prediction_array[i] > n: max_index_lower2 = i lower_prediction_array[max_index_lower2] = 0 for i in range(1,len(lower_prediction_array)): n = lower_prediction_array[max_index_lower3] if lower_prediction_array[i] > n: max_index_lower3 = i lower_prediction_array[max_index_lower3] = 0 for i in range(1, len(lower_prediction_array)): n = lower_prediction_array[max_index_lower4] if lower_prediction_array[i] > n: max_index_upper4 = i lower_prediction_array[max_index_lower4] = 0 for i in range(1, len(lower_prediction_array)): n = lower_prediction_array[max_index_lower5] if lower_prediction_array[i] > n: max_index_lower5 = i lower_indices = [max_index_lower1, max_index_lower2, max_index_lower3, max_index_lower4, max_index_lower5] max_index_outer1 = 0 max_index_outer2 = 0 max_index_outer3 = 0 for i in range(1, len(outer_prediction_array)): n = outer_prediction_array[max_index_outer1] if outer_prediction_array[i] > n: max_index_outer1 = i outer_prediction_array[max_index_outer1] = 0 for i in range(1, len(outer_prediction_array)): n = outer_prediction_array[max_index_outer2] if outer_prediction_array[i] > n: max_index_outer2 = i outer_prediction_array[max_index_outer2] = 0 for i in range(1, len(outer_prediction_array)): n = outer_prediction_array[max_index_outer3] if outer_prediction_array[i] > n: max_index_outer3 = i outer_indices = [max_index_outer1, max_index_outer2, max_index_outer3] max_index_shoes1 = 0 max_index_shoes2 = 0 max_index_shoes3 = 0 max_index_shoes4 = 0 for i in range(1, len(shoes_prediction_array)): n = shoes_prediction_array[max_index_shoes1] if shoes_prediction_array[i] > n: max_index_shoes1 = i shoes_prediction_array[max_index_shoes1] = 0 for i in range(1, len(shoes_prediction_array)): n = shoes_prediction_array[max_index_shoes2] if shoes_prediction_array[i] > n: max_index_shoes2 = i shoes_prediction_array[max_index_shoes2] = 0 for i in range(1, len(shoes_prediction_array)): n = shoes_prediction_array[max_index_shoes3] if shoes_prediction_array[i] > n: max_index_shoes3 = i shoes_prediction_array[max_index_shoes3] = 0 for i in range(1, len(shoes_prediction_array)): n = shoes_prediction_array[max_index_shoes4] if shoes_prediction_array[i] > n: max_index_shoes4 = i shoes_indices = [max_index_shoes1, max_index_shoes2, max_index_shoes3, max_index_shoes4] print "Outer Indices:", outer_indices FSM.received_inputs() print "Exiting Program"
class WekaEstimator(BaseEstimator, OptionHandler, RegressorMixin, ClassifierMixin): """ Wraps a Weka classifier (classifier/regressor) within the scikit-learn framework. """ def __init__(self, jobject=None, classifier=None, classname=None, options=None, nominal_input_vars=None, nominal_output_var=None, num_nominal_input_labels=None, num_nominal_output_labels=None): """ Initializes the estimator. Can be either instantiated via the following priority of parameters: 1. JB_Object representing a Java Classifier object 2. Classifier pww3 wrapper 3. classname/options :param jobject: the JB_Object representing a Weka classifier to use :type jobject: JB_Object :param classifier: the classifier wrapper to use :type classifier: Classifier :param classname: the classname of the Weka classifier to instantiate :type classname: str :param options: the command-line options of the Weka classifier to instantiate :type options: list :param nominal_input_vars: the list of 0-based indices of attributes to convert to nominal or range string with 1-based indices :type nominal_input_vars: list or str :param nominal_output_var: whether to convert the output variable to a nominal one :type nominal_output_var: bool :param num_nominal_input_labels: the dictionary with the number of labels for the nominal input variables (key is 0-based attribute index) :type num_nominal_input_labels: dict :param num_nominal_output_labels: the number of labels for the output variable :type num_nominal_output_labels: int """ if jobject is not None: _jobject = jobject elif classifier is not None: _jobject = classifier.jobject elif classname is not None: if options is None: options = [] classifier = Classifier(classname=classname, options=options) _jobject = classifier.jobject else: raise Exception("At least Java classname must be provided!") if not is_instance_of(_jobject, "weka.classifiers.Classifier"): raise Exception("Java object does not implement weka.classifiers.Classifier!") super(WekaEstimator, self).__init__(_jobject) self._classifier = Classifier(jobject=_jobject) self.header_ = None self.classes_ = None # the following references are required for get_params/set_params self._classname = classname self._options = options self._nominal_input_vars = nominal_input_vars self._nominal_output_var = nominal_output_var self._num_nominal_input_labels = num_nominal_input_labels self._num_nominal_output_labels = num_nominal_output_labels @property def classifier(self): """ Returns the underlying classifier object, if any. :return: the classifier object :rtype: Classifier """ return self._classifier @property def header(self): """ Returns the underlying dataset header, if any. :return: the dataset structure :rtype: Instances """ return self.header_ def fit(self, data, targets): """ Trains the estimator. :param data: the input variables as matrix, array-like of shape (n_samples, n_features) :type data: ndarray :param targets: the class attribute column, array-like of shape (n_samples,) :type targets: ndarray :return: itself :rtype: WekaEstimator """ data, targets = check_X_y(data, y=targets, dtype=None) if self._nominal_input_vars is not None: data = to_nominal_attributes(data, self._nominal_input_vars) if self._nominal_output_var is not None: targets = to_nominal_labels(targets) d = to_instances(data, targets, num_nominal_labels=self._num_nominal_input_labels, num_class_labels=self._num_nominal_output_labels) self._classifier.build_classifier(d) self.header_ = d.template_instances(d, 0) if d.class_attribute.is_nominal: self.classes_ = d.class_attribute.values else: self.classes_ = None return self def predict(self, data): """ Performs predictions with the trained classifier. :param data: the data matrix to generate predictions for, array-like of shape (n_samples, n_features) :type data: ndarray :return: the score (or scores) :rtype: ndarray """ check_is_fitted(self) if self._nominal_input_vars is not None: data = to_nominal_attributes(data, self._nominal_input_vars) data = check_array(data, dtype=None) result = [] for d in data: inst = to_instance(self.header_, d, missing_value()) if self.header_.class_attribute.is_nominal: result.append(self.header_.class_attribute.value(int(self._classifier.classify_instance(inst)))) else: result.append(self._classifier.classify_instance(inst)) return np.array(result) def predict_proba(self, data): """ Performs predictions and returns class probabilities. :param data: the data matrix to generate predictions for, array-like of shape (n_samples, n_features) :type data: ndarray :return: the probabilities """ check_is_fitted(self) if self._nominal_input_vars is not None: data = to_nominal_attributes(data, self._nominal_input_vars) data = check_array(data, dtype=None) result = [] for d in data: inst = to_instance(self.header_, d, missing_value()) result.append(self._classifier.distribution_for_instance(inst)) return np.array(result) def get_params(self, deep=True): """ Returns the parameters for this classifier, basically classname and options list. :param deep: ignored :type deep: bool :return: the dictionary with options :rtype: dict """ result = dict() result["classname"] = self._classname result["options"] = self._options if self._nominal_input_vars is not None: result["nominal_input_vars"] = self._nominal_input_vars if self._nominal_output_var is not None: result["nominal_output_var"] = self._nominal_output_var if self._num_nominal_input_labels is not None: result["num_nominal_input_labels"] = self._num_nominal_input_labels if self._num_nominal_output_labels is not None: result["num_nominal_output_labels"] = self._num_nominal_output_labels return result def set_params(self, **params): """ Sets the options for the classifier, expects 'classname' and 'options'. :param params: the parameter dictionary :type params: dict """ if len(params) == 0: return if "classname" not in params: raise Exception("Cannot find 'classname' in parameters!") if "options" not in params: raise Exception("Cannot find 'options' in parameters!") self._classname = params["classname"] self._options = params["options"] self._classifier = Classifier(classname=self._classname, options=self._options) self._nominal_input_vars = None if "nominal_input_vars" in params: self._nominal_input_vars = params["nominal_input_vars"] self._nominal_output_var = None if "nominal_output_var" in params: self._nominal_output_var = params["nominal_output_var"] self._num_nominal_input_labels = None if "num_nominal_input_labels" in params: self._num_nominal_input_labels = params["num_nominal_input_labels"] self._num_nominal_output_labels = None if "num_nominal_output_labels" in params: self._num_nominal_output_labels = params["num_nominal_output_labels"] def __str__(self): """ For printing the model. :return: the model representation, if any :rtype: str """ if self._classifier is None: return self._classname + ": No model built yet" else: return str(self._classifier) def __copy__(self): """ Creates a deep copy of itself. :return: the copy :rtype: WekaEstimator """ result = WekaEstimator(jobject=deepcopy(self.jobject)) result._classname = self._classname result._options = self._options[:] result._nominal_input_vars = None if (self._nominal_input_vars is None) else self._nominal_input_vars[:] result._nominal_output_var = self._nominal_output_var return result def __repr__(self, N_CHAR_MAX=700): """ Returns a valid Python string using its classname and options. :param N_CHAR_MAX: ignored :type N_CHAR_MAX: int :return: the representation :rtype: str """ if isinstance(self._nominal_input_vars, str): return "WekaEstimator(classname='%s', options=%s, nominal_input_vars='%s', nominal_output_var=%s)" % (self._classifier.classname, str(self._classifier.options), str(self._nominal_input_vars), str(self._nominal_output_var)) else: return "WekaEstimator(classname='%s', options=%s, nominal_input_vars=%s, nominal_output_var=%s)" % (self._classifier.classname, str(self._classifier.options), str(self._nominal_input_vars), str(self._nominal_output_var))
class SklearnWekaWrapper(object): def __init__(self, class_name, options=None): if options is not None: self._classifier = Classifier(classname=class_name, options=[option for option in options.split()]) else: self._classifier = Classifier(classname=class_name) def fit(self, training_set, ground_through): self.ground_through = ground_through training_set = self._sklearn2weka(training_set, self.ground_through) training_set.class_is_last() self._classifier.build_classifier(training_set) def predict(self, testing_set): testing_set = self._sklearn2weka(testing_set, self.ground_through) testing_set.class_is_last() preds = [] for index, inst in enumerate(testing_set): pred = self._classifier.classify_instance(inst) preds.append(pred) preds = np.vectorize(self._dict.get)(preds) return np.array(preds) def predict_proba(self, testing_set): testing_set = self._sklearn2weka(testing_set, self.ground_through) testing_set.class_is_last() dists = [] for index, inst in enumerate(testing_set): dist = self._classifier.distribution_for_instance(inst) dists.append(dist) return np.array(dists) def _sklearn2weka(self, features, labels=None): encoder = CategoricalEncoder(encoding='ordinal') labels_nominal = encoder.fit_transform(np.array(labels).reshape(-1, 1)) if not hasattr(self, 'dict') and labels is not None: dict = {} for label, nominal in zip(labels, labels_nominal): if nominal.item(0) not in dict: dict[nominal.item(0)] = label self._dict = dict labels_column = np.reshape(labels_nominal,[labels_nominal.shape[0], 1]) weka_dataset = ndarray_to_instances(np.ascontiguousarray(features, dtype=np.float_), 'weka_dataset') weka_dataset.insert_attribute(Attribute.create_nominal('tag', [str(float(i)) for i in range(len(self._dict))]), features.shape[1]) if labels is not None: for index, inst in enumerate(weka_dataset): inst.set_value(features.shape[1], labels_column[index]) weka_dataset.set_instance(index,inst) return weka_dataset
#df.drop(['id'],1,inplace=True) from weka.classifiers import Classifier, Evaluation from weka.core.classes import Random #data = ... # previously loaded data data.class_is_last() # set class attribute dataTestResolutionChange.class_is_last() classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"]) # evaluation = Evaluation(data) # initialize with priors # evaluation.crossvalidate_model(classifier, iris_data, 10, Random(42)) # 10-fold CV classifier.build_classifier(data) for index, inst in enumerate(dataTestResolutionChange): pred = classifier.classify_instance(inst) dist = classifier.distribution_for_instance(inst) print str(pred) #print index print inst print( str(index + 1) + ": label index=" + str(pred) + ", class distribution=" + str(dist)) # print(evaluation.summary()) # print("pctCorrect: " + str(evaluation.percent_correct)) # print("incorrect: " + str(evaluation.incorrect)) jvm.stop()
def classify(fileToClassify, fileToCompare, predictionYear=None, pastResultYears=None, prefix="NFL", classifierFunction=[ "LinearRegression", ["-S", "0", "-R", "1.0E-8", "-num-decimal-places", "4"] ]): # Start Java VM jvm.start(max_heap_size="1024m") # Load CSV files into weka loader loader = Loader(classname="weka.core.converters.CSVLoader") fileToClassifyData = loader.load_file(fileToClassify) fileToClassifyData.class_is_last() fileToCompareData = loader.load_file(fileToCompare) fileToCompareData.class_is_last() predictionYear = "".join(map(str, predictionYear)) pastResultYears = "-".join(map(str, pastResultYears)) # Generate Classifier based on data classifier = Classifier(classname="weka.classifiers.functions.{}".format( classifierFunction[0]), options=classifierFunction[1]) classifier.build_classifier(fileToClassifyData) print(classifier) # Var builder for graph count = 0.0 countPred = 0.0 graphDetails = [ ['TITLE'], [ '{1} Data Ratings (Official) {0}'.format(pastResultYears, prefix), [], [] ], [ '{1} Data Ratings (Predicted) {0}'.format(predictionYear, prefix), [], [] ] ] # Time to predict results based on classifier for index, inst in enumerate(fileToCompareData): pred = classifier.classify_instance(inst) temp = list(enumerate(inst))[-1][1] countPred += pred count += temp # index=list(enumerate(inst))[3+1][1] index += 1 print('YOLO', list(enumerate(inst))[3][1]) print("{0:.3f} accurate compared to results.".format(countPred / count)) dist = classifier.distribution_for_instance(inst) # NFL Results graphDetails[1][1].append(index) graphDetails[1][2].append(temp) # Predicted Results graphDetails[2][1].append(index) graphDetails[2][2].append(pred) print( str(index + 1) + ": label index=" + str(pred) + ", class distribution=" + str(dist) + " , original: " + str(temp)) graphDetails[0][ 0] = 'Player Rating Predictions For {0} ({1:.3f} Accurate)'.format( predictionYear, 100 - (countPred / count)) jvm.stop() BuildGraph(graphDetails)
tempList = list() jvm.start() data_dir = "C:\Users\Softmints\Desktop\Diss\Code\WEKA" from weka.core.converters import Loader #Prepare ARFF Loader loader = Loader(classname="weka.core.converters.ArffLoader") #Assign ands load ARFF data file data = loader.load_file(data_dir + "\TestDataEleventoTwentyTwo.arff") data.class_is_last() from weka.classifiers import Classifier #Classify data using J48 classifer cls = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"]) cls.build_classifier(data) for index, inst in enumerate(data): #Output predicition and distribution pred = cls.classify_instance(inst) dist = cls.distribution_for_instance(inst) print(str(index) + ": label index=" + str(pred) + ", class distribution=" + str(dist)) if str(pred) == "0.0": tempList.append(str(index)) print tempList jvm.stop()
def perceptron_classifier(cls, features, settings): # carrega o dataset loader = Loader("weka.core.converters.ArffLoader") instancias = loader.load_file( "./src/results/caracteristicas_sounds.arff") # sinaliza que o ultimo atributo é a classe instancias.class_is_last() # Define os Parametros learning_rate = str(settings['learningRate']) training_time = str(settings['trainingTime']) momentum = "0.2" hidden_layers = "a" seed = 2 cross_validation = 20 print('Learning Rate', learning_rate) print('Training Time', training_time) # Carrega o classificafor Multilayer Perceptron de acordo com os parametros definidos classifier = Classifier( classname="weka.classifiers.functions.MultilayerPerceptron", options=[ "-L", learning_rate, "-M", momentum, "-N", training_time, "-V", "0", "-S", str(seed), "-E", "20", "-H", hidden_layers ]) # Constroi o Classificador e Valida o dataset classifier.build_classifier(instancias) evaluation = Evaluation(instancias) # Aplica o Cross Validation rnd = Random(seed) rand_data = Instances.copy_instances(instancias) rand_data.randomize(rnd) if rand_data.class_attribute.is_nominal: rand_data.stratify(cross_validation) for i in range(cross_validation): # treina as instancias train = instancias.train_cv(cross_validation, i) # testa as instancias test = instancias.test_cv(cross_validation, i) # Constroi e Valida o Classificador cls = Classifier.make_copy(classifier) cls.build_classifier(train) evaluation.test_model(cls, test) # Cria uma nova instância com base nas caracteristicas extraidas new_instance = Instance.create_instance(features) # Adiciona a nova instância ao dataset instancias.add_instance(new_instance) # Liga a nova instancia ao dataset treinado com o classificador new_instance.dataset = train # Classifica a nova instância trazendo as probabilidades de ela pertencer as classes definidas classification = classifier.distribution_for_instance(new_instance) result = { 'cat': round(classification[0] * 100, 2), 'dog': round(classification[1] * 100, 2) } print("=== Setup ===") print("Classifier: " + classifier.to_commandline()) print("Dataset: " + instancias.relationname) print("Cross Validation: " + str(cross_validation) + "folds") print("Seed: " + str(seed)) print("") print( evaluation.summary("=== " + str(cross_validation) + " -fold Cross-Validation ===")) print("Classificação", " - Gato: ", result['cat'], " Cachorro: ", result['dog']) return result
def predictionFromModel(): import weka.core.serialization as serialization from weka.classifiers import Classifier from weka.classifiers import Evaluation predictionsPath = outputPrediction models_dir = inputModel modelsList = os.listdir(inputModel) data_dir = input folderList = os.listdir(inputModel) i = 0 loader = Loader(classname="weka.core.converters.ArffLoader") from weka.core.classes import Random from weka.core.dataset import Instances data = loader.load_file(os.path.join(inputModel, "genderTest.arff")) data.class_is_last() modelName = "GenderModel.model" objects = serialization.read_all(os.path.join(inputModel, modelName)) trainedModel = Classifier(jobject=objects[0]) genderFile = open(os.path.join(outputPrediction, 'Gender_Predictions.csv'), 'w') with genderFile: j = -1 fieldnames = ['Test_Author_Profile_Id', 'Gender'] writer = csv.DictWriter(genderFile, fieldnames=fieldnames) writer.writeheader() for index, inst in enumerate(data): j = j + 1 pred = trainedModel.classify_instance(inst) dist = trainedModel.distribution_for_instance(inst) print( str(index + 1) + ": label index=" + str(pred) + ", class distribution=" + str(dist)) if (str(pred) == '0.0'): writer.writerow({ 'Test_Author_Profile_Id': my_list[j], 'Gender': 'male' }) if (str(pred) == '1.0'): writer.writerow({ 'Test_Author_Profile_Id': my_list[j], 'Gender': 'female' }) data = loader.load_file(os.path.join(inputModel, "ageTest.arff")) data.class_is_last() modelName = "AgeModel.model" objects = serialization.read_all(os.path.join(inputModel, modelName)) trainedModel = Classifier(jobject=objects[0]) ageFile = open(os.path.join(outputPrediction, 'Age_Predictions.csv'), 'w') with ageFile: j = -1 fieldnames = ['Test_Author_Profile_Id', 'Age'] writer = csv.DictWriter(ageFile, fieldnames=fieldnames) writer.writeheader() for index, inst in enumerate(data): j = j + 1 pred = trainedModel.classify_instance(inst) dist = trainedModel.distribution_for_instance(inst) print( str(index + 1) + ": label index=" + str(pred) + ", class distribution=" + str(dist)) if (str(pred) == '0.0'): writer.writerow({ 'Test_Author_Profile_Id': my_list[j], 'Age': '15-19' }) if (str(pred) == '1.0'): writer.writerow({ 'Test_Author_Profile_Id': my_list[j], 'Age': '20-24' }) if (str(pred) == '2.0'): writer.writerow({ 'Test_Author_Profile_Id': my_list[j], 'Age': '25-xx' }) os._exit(0)
class Weka(object): data = None dataDir = None classifier = None def __init__(self, dataDir='.'): self.dataDir = dataDir jvm.start() # Inicializa dados com conteudo do arquivo arff def initData(self, arrfFile): loader = Loader(classname="weka.core.converters.ArffLoader") print self.dataDir + '/' + arrfFile self.data = loader.load_file(self.dataDir + '/' + arrfFile) self.data.class_is_last() print 'Carregando arquivo ' + self.dataDir + '/' + arrfFile # print(data) # Realiza o treinamento do classificador def trainData(self, arrfFile=None, classname="weka.classifiers.trees.J48", options=["-C", "0.3"]): if arrfFile is not None: self.initData(arrfFile) if self.data is None: return print 'Contruindo classificador ' + str(classname) + ' ' + ' '.join( options) self.classifier = Classifier(classname=classname, options=options) self.classifier.build_classifier(self.data) # Realiza a classificacao das instancias de um arquivo arff def classify(self, predictFile): if self.data is None or self.classifier is None: return [-1] loader = Loader(classname="weka.core.converters.ArffLoader") predict_data = loader.load_file(self.dataDir + '/' + predictFile) predict_data.class_is_last() values = str(predict_data.class_attribute)[19:-1].split(',') classes = [] for index, inst in enumerate(predict_data): #pred = self.classifier.classify_instance(inst) prediction = self.classifier.distribution_for_instance(inst) cl = int(values[prediction.argmax()][7:]) #print 'Classe:', cl classes.append(cl) return classes # Realiza uma validação cruzada e mostra os resultados na saída padrão def crossValidate(self, arrfFile=None, classname="weka.classifiers.trees.J48", options=["-C", "0.3"]): if arrfFile is not None: self.initData(arrfFile) if self.data is None: return print 'Classificador ' + str(classname) + ' ' + ' '.join(options) cls = Classifier(classname=classname, options=options) evl = Evaluation(self.data) evl.crossvalidate_model(cls, self.data, 10, Random(1)) print(evl.percent_correct) print(evl.summary()) print(evl.class_details())
class SklearnWekaWrapper(object): def __init__(self, classifier_name): # Defaults class_name = 'weka.classifiers.trees.RandomForest' options = None self.proba = None if classifier_name == 'wrf': class_name = 'weka.classifiers.trees.RandomForest' options = None elif classifier_name == 'wj48': class_name = 'weka.classifiers.trees.J48' options = None elif classifier_name == 'wnb': class_name = 'weka.classifiers.bayes.NaiveBayes' options = '-D' elif classifier_name == 'wbn': class_name = 'weka.classifiers.bayes.BayesNet' options = '-D -Q weka.classifiers.bayes.net.search.local.TAN -- -S BAYES -E weka.classifiers.bayes.net.estimate.SimpleEstimator -- -A 0.5' elif classifier_name == 'wsv': # Implementation of one-class SVM used in Anomaly Detection mode class_name = 'weka.classifiers.functions.LibSVM' options = '-S 2' if options is not None: self._classifier = Classifier(classname=class_name, options=[option for option in options.split()]) else: self._classifier = Classifier(classname=class_name) self.model_ = None def fit(self, training_set, ground_truth): self.ground_truth = ground_truth training_set = self._sklearn2weka(training_set, self.ground_truth) training_set.class_is_last() t = 0 t = time() - t self._classifier.build_classifier(training_set) t = time() - t self.model_ = self._classifier self.tr_ = t return self def predict(self, testing_set): testing_set = self._sklearn2weka(testing_set, self.oracle) testing_set.class_is_last() preds = [] dists = [] t = 0 for index, inst in enumerate(testing_set): t = time() - t pred = self._classifier.classify_instance(inst) t = time() - t dist = self._classifier.distribution_for_instance(inst) preds.append(pred) dists.append(dist) preds = np.vectorize(self._dict.get)(preds) self.proba = dists self.te_ = t return np.array(preds) def predict_proba(self, testing_set): if self.proba is None: self.predict(testing_set) return self.proba def set_oracle(self, oracle): self.oracle = oracle def _sklearn2weka(self, features, labels=None): # All weka datasets have to be a zero-based coding for the column of labels # We can use non-aligned labels for training and testing because the labels # in testing phase are only used to obtain performance, but not for preds. # We compute performance off-line. labels_encoder = OrdinalEncoder() labels_nominal = labels_encoder.fit_transform(np.array(labels).reshape(-1, 1)) labels_column = np.reshape(labels_nominal, [labels_nominal.shape[0], 1]) # TODO: find another way to do the same # The follow is used to assign the value of _dict only in training phase if not hasattr(self, '_dict') and labels is not None: dict = {} for label, nominal in zip(labels, labels_nominal): if nominal.item(0) not in dict: dict[nominal.item(0)] = label self._dict = dict weka_dataset = ndarray_to_instances(np.ascontiguousarray(features, dtype=np.float_), 'weka_dataset') weka_dataset.insert_attribute(Attribute.create_nominal('tag', [str(float(i)) for i in range(len(self._dict))]), features.shape[1]) if labels is not None: try: for index, inst in enumerate(weka_dataset): inst.set_value(features.shape[1], labels_column[index]) weka_dataset.set_instance(index, inst) except TypeError as e: print('Error: it seems InstanceIterator does not implement a valid iterator.') print('Please, check the class definition in lib/python3.7/site-packages/weka/core/dataset.py.') print('This error could be due to the next() method: it should be declared as __next__().') exit() return weka_dataset
from weka.core.converters import Loader import weka.core.jvm as jvm from weka.classifiers import Classifier jvm.start() #loader = Loader(classname="weka.core.converters.JSONLoader") #data = loader.load_file("test.json") loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file("games.arff") data.class_is_last() print(data) cls = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"]) cls.build_classifier(data) for index, inst in enumerate(data): pred = cls.classify_instance(inst) dist = cls.distribution_for_instance(inst) print( str(index + 1) + ": label index=" + str(pred) + ", class distribution=" + str(dist)) jvm.stop()
class SklearnWekaWrapper(object): def __init__(self, classifier_name): if classifier_name == 'wrf': class_name='weka.classifiers.trees.RandomForest' options=None elif classifier_name == 'wj48': class_name='weka.classifiers.trees.J48' options=None elif classifier_name == 'wnb': class_name='weka.classifiers.bayes.NaiveBayes' options='-D' elif classifier_name == 'wbn': class_name='weka.classifiers.bayes.BayesNet' options='-D -Q weka.classifiers.bayes.net.search.local.TAN -- -S BAYES -E weka.classifiers.bayes.net.estimate.SimpleEstimator -- -A 0.5' if options is not None: Classifier(classname=class_name, options=[option for option in options.split()]) else: self._classifier = Classifier(classname=class_name) def fit(self, training_set, ground_truth): self.ground_truth = ground_truth training_set = self._sklearn2weka(training_set, self.ground_truth) training_set.class_is_last() self._classifier.build_classifier(training_set) def predict(self, testing_set): testing_set = self._sklearn2weka(testing_set, self.ground_truth) testing_set.class_is_last() preds = [] for index, inst in enumerate(testing_set): pred = self._classifier.classify_instance(inst) preds.append(pred) preds = np.vectorize(self._dict.get)(preds) return np.array(preds) def predict_proba(self, testing_set): testing_set = self._sklearn2weka(testing_set, self.ground_truth) testing_set.class_is_last() dists = [] for index, inst in enumerate(testing_set): dist = self._classifier.distribution_for_instance(inst) dists.append(dist) return np.array(dists) def set_oracle(self, oracle): pass def _sklearn2weka(self, features, labels=None): features_encoder = OrdinalEncoder() labels_nominal = features_encoder.fit_transform(np.array(labels).reshape(-1, 1)) if not hasattr(self, 'dict') and labels is not None: dict = {} for label, nominal in zip(labels, labels_nominal): if nominal.item(0) not in dict: dict[nominal.item(0)] = label self._dict = dict labels_column = np.reshape(labels_nominal,[labels_nominal.shape[0], 1]) weka_dataset = ndarray_to_instances(np.ascontiguousarray(features, dtype=np.float_), 'weka_dataset') weka_dataset.insert_attribute(Attribute.create_nominal('tag', [str(float(i)) for i in range(len(self._dict))]), features.shape[1]) if labels is not None: for index, inst in enumerate(weka_dataset): inst.set_value(features.shape[1], labels_column[index]) weka_dataset.set_instance(index,inst) return weka_dataset