def get_classifier(classifier, min_no): """ Return the classifier object given the options :param classifier: string name of the classifier to use :param min_no: Minimum number of instances correctly covered by the classifier :return: classifier object """ if classifier.lower() == 'jrip': cls = Classifier(classname="weka.classifiers.rules.JRip") options = list() options.append("-N") options.append(str(min_no)) elif classifier.lower() in ['dec_tree', 'part']: if classifier.lower() == 'dec_tree': cls = Classifier(classname="weka.classifiers.trees.J48") elif classifier.lower() == 'part': cls = Classifier(classname="weka.classifiers.rules.PART") options = list() options.append("-M") options.append(str(min_no)) else: raise ValueError( "Please enter the correct classifier name (jrip | dec_tree | part)" ) cls.options = options return cls
def get_classifier(min_no, seed): cls = Classifier(classname="weka.classifiers.rules.JRip") # options = ["-N", "25.0"] #-N: minNo, -F folds, -O num optimizations, -batch-size, -S: seed options = list() options.append("-N") options.append(str(min_no)) options.append("-S") options.append(str(seed)) cls.options = options return cls
def get_classifier(min_no, seed): """ Return the classifier object given the options :param min_no: Minimum number of instances correctly covered by JRIP :param seed: Seed for randomizing instance order :return: classifier object """ cls = Classifier(classname="weka.classifiers.rules.JRip") options = list() options.append("-N") options.append(str(min_no)) options.append("-S") options.append(str(seed)) cls.options = options return cls
import weka.core.jvm as jvm jvm.start() jvm.start(system_cp=True, packages=True) jvm.start(packages="/usr/local/lib/python2.7/dist-packages/weka") jvm.start(max_heap_size="512m") data_dir="CSDMC2010_SPAM/CSDMC2010_SPAM/TRAINING" from weka.classifiers import Classifier cls = Classifier(classname="weka.classifiers.trees.J48") cls.options = ["-C", "0.3"] print(cls.options) jvm.stop()
Ypred = np.zeros(Y.shape, dtype='object') print "Classification using K Nearest Neighbors" for train_index, test_index in sss: print "Iter", itr, X_train, X_test = X[train_index], X[test_index] X_test[:,-1] = classes[0] # make sure test classes is removed y_test = Y[test_index] write_to_weka('train.arff', 'training_data', data.columns, X_train, classes) write_to_weka('test.arff', 'testing_data', data.columns, X_test, classes) loader = Loader(classname="weka.core.converters.ArffLoader") trdata = loader.load_file("train.arff") trdata.class_is_last() classifier = Classifier(classname="weka.classifiers.lazy.IBk") classifier.options = ["-K", "10", "-W", "0", "-I", "-A", "weka.core.neighboursearch.LinearNNSearch -A \"weka.core.ManhattanDistance -R first-last\""] classifier.build_classifier(trdata) tedata = loader.load_file("test.arff") tedata.class_is_last() for index, inst in enumerate(tedata): result = classifier.classify_instance(inst) Ypred[test_index[index]] = classes[int(result)] accuracy = float(np.sum(y_test == Ypred[test_index])) / float(y_test.shape[0]) print " => Accuracy = ", accuracy itr += 1 accuracy = float(np.sum(Y == Ypred)) / float(Y.shape[0]) print "Total accuracy = ", accuracy
def train(request): jvm.start() d_att1 = Attribute.create_numeric("bodydearword.feature") d_att2 = Attribute.create_numeric("bodyform.feature") d_att3 = Attribute.create_numeric("bodyhtml.feature") d_att4 = Attribute.create_numeric("bodymultipart.feature") d_att5 = Attribute.create_numeric("bodynumchars.feature") d_att6 = Attribute.create_numeric("bodynumfunctionwords.feature") d_att7 = Attribute.create_numeric("bodynumuniqwords.feature") d_att8 = Attribute.create_numeric("bodynumwords.feature") d_att9 = Attribute.create_numeric("bodyrichness.feature") d_att10 = Attribute.create_numeric("bodysuspensionword.feature") d_att11 = Attribute.create_numeric("bodyverifyyouraccountphrase.feature") d_att12 = Attribute.create_numeric("externalsabinary.feature") d_att13 = Attribute.create_numeric("externalsascore.feature") d_att14 = Attribute.create_numeric("scriptjavascript.feature") d_att15 = Attribute.create_numeric("scriptonclick.feature") d_att16 = Attribute.create_numeric("scriptpopup.feature") d_att17 = Attribute.create_numeric("scriptstatuschange.feature") d_att18 = Attribute.create_numeric("scriptunmodalload.feature") d_att19 = Attribute.create_numeric("senddiffreplyto.feature") d_att20 = Attribute.create_numeric("sendnumwords.feature") d_att21 = Attribute.create_numeric("sendunmodaldomain.feature") d_att22 = Attribute.create_numeric("subjectbankword.feature") d_att23 = Attribute.create_numeric("subjectdebitword.feature") d_att24 = Attribute.create_numeric("subjectfwdword.feature") d_att25 = Attribute.create_numeric("subjectnumchars.feature") d_att26 = Attribute.create_numeric("subjectnumwords.feature") d_att27 = Attribute.create_numeric("subjectreplyword.feature") d_att28 = Attribute.create_numeric("subjectrichness.feature") d_att29 = Attribute.create_numeric("subjectverifyword.feature") d_att30 = Attribute.create_numeric("urlatchar.feature") d_att31 = Attribute.create_numeric("urlbaglink.feature") d_att32 = Attribute.create_numeric("urlip.feature") d_att33 = Attribute.create_numeric("urlnumdomains.feature") d_att34 = Attribute.create_numeric("urlnumexternallink.feature") d_att35 = Attribute.create_numeric("urlnumimagelink.feature") d_att36 = Attribute.create_numeric("urlnuminternallink.feature") d_att37 = Attribute.create_numeric("urlnumip.feature") d_att38 = Attribute.create_numeric("urlnumlink.feature") d_att39 = Attribute.create_numeric("urlnumperiods.feature") d_att40 = Attribute.create_numeric("urlnumport.feature") d_att41 = Attribute.create_numeric("urlport.feature") d_att42 = Attribute.create_numeric("urltwodoains.feature") d_att43 = Attribute.create_numeric("urlunmodalbaglink.feature") d_att44 = Attribute.create_numeric("urlwordclicklink.feature") d_att45 = Attribute.create_numeric("urlwordherelink.feature") d_att46 = Attribute.create_numeric("urlwordloginlink.feature") d_att47 = Attribute.create_numeric("urlwordupdatelink.feature") d_att48 = Attribute.create_nominal("class", {'phish', 'ham'}) # data_dir = settings.BASE_DIR + "/phishing/public/datasets/" # loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(data_dir + "dataset.arff") data.class_is_last() cls = Classifier(classname="weka.classifiers.trees.J48") cls.options = ["-C", "0.3"] cls.build_classifier(data) serialization.write(data_dir + "out.model", cls) classifier = Classifier(jobject=serialization.read(data_dir + "out.model")) dataset = Instances.create_instances("test", [ d_att1, d_att2, d_att3, d_att4, d_att5, d_att6, d_att7, d_att8, d_att9, d_att10, d_att11, d_att12, d_att13, d_att14, d_att15, d_att16, d_att17, d_att18, d_att19, d_att20, d_att21, d_att22, d_att23, d_att24, d_att25, d_att26, d_att27, d_att28, d_att29, d_att30, d_att31, d_att32, d_att33, d_att34, d_att35, d_att36, d_att37, d_att38, d_att39, d_att40, d_att41, d_att42, d_att43, d_att44, d_att45, d_att46, d_att47, d_att48 ], 0) values = [ 0, 0, 0, 0, 890, 1, 124, 198, 0.22247191011236, 0, 0, 0, 0.0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 21, 4, 1, 0.19047619047619, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Instance.missing_value() ] inst = Instance.create_instance(values) dataset.add_instance(inst) dataset.class_is_last() # print(str(dataset)) var = '' for inst1 in dataset: pred = classifier.classify_instance(inst1) var = inst1.class_attribute.value(int(pred)) if var == 'ham': print('No es pishing') # do somthing else: print('Es pishing') # do somthing print(var) jvm.stop() return HttpResponse(str(var))
data = loader.load_file(arffFileName) data.class_is_last() data.delete_attribute(0) #delete source IP attribute data.delete_attribute(1) #delete destination IP attribute #nominalAttr = Attribute.create_nominal("class", "{yes, no}") #data.delete_last_attribute() #data.insert_attribute(nominalAttr, 0) #data.class_is_first() #print(data) classifier = Classifier(classname="weka.classifiers.trees.RandomForest") classifier.options = [ '-P', '100', '-I', '100', '-num-slots', '1', '-K', '0', '-M', '1.0', '-V', '0.001', '-S', '1' ] folds = 10 seed = 1 rnd = Random(seed) rand_data = Instances.copy_instances(data) rand_data.randomize(rnd) if rand_data.class_attribute.is_nominal: rand_data.stratify(folds) progress = 0 predicted_data = None evaluation = Evaluation(rand_data) for i in range(folds):