示例#1
0
def get_classifier(classifier, min_no):
    """
    Return the classifier object given the options
    :param classifier: string name of the classifier to use
    :param min_no: Minimum number of instances correctly covered by the classifier
    :return: classifier object
    """

    if classifier.lower() == 'jrip':
        cls = Classifier(classname="weka.classifiers.rules.JRip")
        options = list()
        options.append("-N")
        options.append(str(min_no))

    elif classifier.lower() in ['dec_tree', 'part']:
        if classifier.lower() == 'dec_tree':
            cls = Classifier(classname="weka.classifiers.trees.J48")
        elif classifier.lower() == 'part':
            cls = Classifier(classname="weka.classifiers.rules.PART")
        options = list()
        options.append("-M")
        options.append(str(min_no))
    else:
        raise ValueError(
            "Please enter the correct classifier name (jrip | dec_tree | part)"
        )

    cls.options = options
    return cls
示例#2
0
def get_classifier(min_no, seed):
    cls = Classifier(classname="weka.classifiers.rules.JRip")
    # options = ["-N", "25.0"] #-N: minNo, -F folds, -O num optimizations, -batch-size, -S: seed
    options = list()
    options.append("-N")
    options.append(str(min_no))
    options.append("-S")
    options.append(str(seed))

    cls.options = options
    return cls
示例#3
0
def get_classifier(min_no, seed):
    """
    Return the classifier object given the options
    :param min_no: Minimum number of instances correctly covered by JRIP
    :param seed: Seed for randomizing instance order
    :return: classifier object
    """
    cls = Classifier(classname="weka.classifiers.rules.JRip")
    options = list()
    options.append("-N")
    options.append(str(min_no))
    options.append("-S")
    options.append(str(seed))

    cls.options = options
    return cls
import weka.core.jvm as jvm
jvm.start()

jvm.start(system_cp=True, packages=True)
jvm.start(packages="/usr/local/lib/python2.7/dist-packages/weka")

jvm.start(max_heap_size="512m")

data_dir="CSDMC2010_SPAM/CSDMC2010_SPAM/TRAINING"


from  weka.classifiers  import  Classifier 
cls = Classifier(classname="weka.classifiers.trees.J48")
cls.options = ["-C", "0.3"]
print(cls.options)


jvm.stop()
Ypred = np.zeros(Y.shape, dtype='object')
print "Classification using K Nearest Neighbors"
for train_index, test_index in sss:
    print "Iter", itr,
    X_train, X_test = X[train_index], X[test_index]
    X_test[:,-1] = classes[0]       # make sure test classes is removed
    y_test = Y[test_index]
    write_to_weka('train.arff', 'training_data', data.columns, X_train, classes)
    write_to_weka('test.arff', 'testing_data', data.columns, X_test, classes)

    loader = Loader(classname="weka.core.converters.ArffLoader")
    trdata = loader.load_file("train.arff")
    trdata.class_is_last()

    classifier = Classifier(classname="weka.classifiers.lazy.IBk")
    classifier.options = ["-K", "10", "-W", "0", "-I", "-A",
                          "weka.core.neighboursearch.LinearNNSearch -A \"weka.core.ManhattanDistance -R first-last\""]
    classifier.build_classifier(trdata)

    tedata = loader.load_file("test.arff")
    tedata.class_is_last()

    for index, inst in enumerate(tedata):
        result = classifier.classify_instance(inst)
        Ypred[test_index[index]] = classes[int(result)]

    accuracy = float(np.sum(y_test == Ypred[test_index])) / float(y_test.shape[0])
    print " => Accuracy = ", accuracy
    itr += 1
accuracy = float(np.sum(Y == Ypred)) / float(Y.shape[0])
print "Total accuracy = ", accuracy
示例#6
0
def train(request):

    jvm.start()

    d_att1 = Attribute.create_numeric("bodydearword.feature")
    d_att2 = Attribute.create_numeric("bodyform.feature")
    d_att3 = Attribute.create_numeric("bodyhtml.feature")
    d_att4 = Attribute.create_numeric("bodymultipart.feature")
    d_att5 = Attribute.create_numeric("bodynumchars.feature")
    d_att6 = Attribute.create_numeric("bodynumfunctionwords.feature")
    d_att7 = Attribute.create_numeric("bodynumuniqwords.feature")
    d_att8 = Attribute.create_numeric("bodynumwords.feature")
    d_att9 = Attribute.create_numeric("bodyrichness.feature")
    d_att10 = Attribute.create_numeric("bodysuspensionword.feature")
    d_att11 = Attribute.create_numeric("bodyverifyyouraccountphrase.feature")
    d_att12 = Attribute.create_numeric("externalsabinary.feature")
    d_att13 = Attribute.create_numeric("externalsascore.feature")
    d_att14 = Attribute.create_numeric("scriptjavascript.feature")
    d_att15 = Attribute.create_numeric("scriptonclick.feature")
    d_att16 = Attribute.create_numeric("scriptpopup.feature")
    d_att17 = Attribute.create_numeric("scriptstatuschange.feature")
    d_att18 = Attribute.create_numeric("scriptunmodalload.feature")
    d_att19 = Attribute.create_numeric("senddiffreplyto.feature")
    d_att20 = Attribute.create_numeric("sendnumwords.feature")
    d_att21 = Attribute.create_numeric("sendunmodaldomain.feature")
    d_att22 = Attribute.create_numeric("subjectbankword.feature")
    d_att23 = Attribute.create_numeric("subjectdebitword.feature")
    d_att24 = Attribute.create_numeric("subjectfwdword.feature")
    d_att25 = Attribute.create_numeric("subjectnumchars.feature")
    d_att26 = Attribute.create_numeric("subjectnumwords.feature")
    d_att27 = Attribute.create_numeric("subjectreplyword.feature")
    d_att28 = Attribute.create_numeric("subjectrichness.feature")
    d_att29 = Attribute.create_numeric("subjectverifyword.feature")
    d_att30 = Attribute.create_numeric("urlatchar.feature")
    d_att31 = Attribute.create_numeric("urlbaglink.feature")
    d_att32 = Attribute.create_numeric("urlip.feature")
    d_att33 = Attribute.create_numeric("urlnumdomains.feature")
    d_att34 = Attribute.create_numeric("urlnumexternallink.feature")
    d_att35 = Attribute.create_numeric("urlnumimagelink.feature")
    d_att36 = Attribute.create_numeric("urlnuminternallink.feature")
    d_att37 = Attribute.create_numeric("urlnumip.feature")
    d_att38 = Attribute.create_numeric("urlnumlink.feature")
    d_att39 = Attribute.create_numeric("urlnumperiods.feature")
    d_att40 = Attribute.create_numeric("urlnumport.feature")
    d_att41 = Attribute.create_numeric("urlport.feature")
    d_att42 = Attribute.create_numeric("urltwodoains.feature")
    d_att43 = Attribute.create_numeric("urlunmodalbaglink.feature")
    d_att44 = Attribute.create_numeric("urlwordclicklink.feature")
    d_att45 = Attribute.create_numeric("urlwordherelink.feature")
    d_att46 = Attribute.create_numeric("urlwordloginlink.feature")
    d_att47 = Attribute.create_numeric("urlwordupdatelink.feature")
    d_att48 = Attribute.create_nominal("class", {'phish', 'ham'})
    #
    data_dir = settings.BASE_DIR + "/phishing/public/datasets/"
    #
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_dir + "dataset.arff")
    data.class_is_last()
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.options = ["-C", "0.3"]
    cls.build_classifier(data)

    serialization.write(data_dir + "out.model", cls)
    classifier = Classifier(jobject=serialization.read(data_dir + "out.model"))

    dataset = Instances.create_instances("test", [
        d_att1, d_att2, d_att3, d_att4, d_att5, d_att6, d_att7, d_att8, d_att9,
        d_att10, d_att11, d_att12, d_att13, d_att14, d_att15, d_att16, d_att17,
        d_att18, d_att19, d_att20, d_att21, d_att22, d_att23, d_att24, d_att25,
        d_att26, d_att27, d_att28, d_att29, d_att30, d_att31, d_att32, d_att33,
        d_att34, d_att35, d_att36, d_att37, d_att38, d_att39, d_att40, d_att41,
        d_att42, d_att43, d_att44, d_att45, d_att46, d_att47, d_att48
    ], 0)
    values = [
        0, 0, 0, 0, 890, 1, 124, 198, 0.22247191011236, 0, 0, 0, 0.0, 0, 0, 0,
        0, 0, 1, 4, 0, 0, 0, 0, 21, 4, 1, 0.19047619047619, 0, 0, 0, 0, 2, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        Instance.missing_value()
    ]
    inst = Instance.create_instance(values)
    dataset.add_instance(inst)
    dataset.class_is_last()
    # print(str(dataset))
    var = ''
    for inst1 in dataset:
        pred = classifier.classify_instance(inst1)
        var = inst1.class_attribute.value(int(pred))
        if var == 'ham':
            print('No es pishing')
            # do somthing
        else:
            print('Es pishing')
            # do somthing

        print(var)

    jvm.stop()

    return HttpResponse(str(var))
data = loader.load_file(arffFileName)
data.class_is_last()
data.delete_attribute(0)  #delete source IP attribute
data.delete_attribute(1)  #delete destination IP attribute

#nominalAttr = Attribute.create_nominal("class", "{yes, no}")
#data.delete_last_attribute()
#data.insert_attribute(nominalAttr, 0)
#data.class_is_first()

#print(data)

classifier = Classifier(classname="weka.classifiers.trees.RandomForest")
classifier.options = [
    '-P', '100', '-I', '100', '-num-slots', '1', '-K', '0', '-M', '1.0', '-V',
    '0.001', '-S', '1'
]

folds = 10
seed = 1
rnd = Random(seed)
rand_data = Instances.copy_instances(data)
rand_data.randomize(rnd)
if rand_data.class_attribute.is_nominal:
    rand_data.stratify(folds)

progress = 0

predicted_data = None
evaluation = Evaluation(rand_data)
for i in range(folds):