Python RIPPER示例，wittgenstein.RIPPER Python示例

示例#1

0

显示文件

    def define_ruleset_model(self, model_name, status):
        '''
            This function predicts the classes based on RIPPER method
            for GBT or SVM model and creates corresponding model. 
            The model is then saved in to the directory for reuse.
        '''

        filename = 'models/model_' + model_name.lower() + '.pkl'
        model = pickle.load(open(filename, 'rb'))

        y_predicted = model.predict(self.X_transformed)

        clf = lw.RIPPER()
        clf.fit(self.X_transformed,
                y_predicted,
                pos_class=status,
                random_state=42)
        lol = clf.predict(self.X_transformed, give_reasons=True)
        count = 1
        for each in lol[1]:
            if len(each) > 0:
                count += 1
        print(clf.ruleset_.out_pretty())
        with open(
                'models/ruleset_' + model_name.lower() + '_model_' +
                str(status) + '.pkl', 'wb') as mod:
            pickle.dump(clf, mod)

示例#2

0

显示文件

文件： rule_based_classifier.py 项目： Raunak005/Biased-Prediction-A-Comparative-Analysis

def rule_based_classifier(training_data):
    print('Generating the data model for a rule based classifier . . .\n')
    X = util.drop_target_variable(training_data)
    y = util.retrieve_target_variable(training_data)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.7,
                                                        random_state=1)
    rule_based_classifier = lw.RIPPER()
    rule_based_classifier.fit(X_train, y_train)
    print(rule_based_classifier.ruleset_.out_pretty())
    print(
        'The data model for rule based classifier has been generated successfully!\n'
    )
    util.save_data_model(rule_based_classifier, 'rule_based_classifier')
    return

示例#3

0

显示文件

文件： Code.py 项目： srishti-chaudhary/SpamClassification-SVM

def RIPPER(FeatureMatrix, Labels):

    FeatureMatrix = pandas.DataFrame.sparse.from_spmatrix(FeatureMatrix)

    XTrain, XTest, LabelTrain, LabelTest = train_test_split(FeatureMatrix,
                                                            Labels,
                                                            test_size=0.1)

    # training model on dataset
    clf = wittgenstein.RIPPER()
    clf.fit(XTrain, LabelTrain, class_feat=None, pos_class='1')

    # testing model on dataset
    expected = LabelTest
    predicted = clf.predict(XTest)

    return (expected, predicted)

示例#4

0

显示文件

文件： ch05_part2.py 项目： Umreen24/cs5310_LabCh5

y = sub1_state_labels

# Splitting data
sub1_X_train, sub1_X_test, sub1_y_train, sub1_y_test = train_test_split(
    X,
    y,
    test_size = 0.2,
    random_state = 42,
    stratify = sub1_state_labels)

"""
Q10 - Train training dataset using "RIPPER" model.
"""
# Need to train with each brain state being the positive class
# Passing random state of different values for each data model
pre_data_model = lw.RIPPER(random_state = 42)
med_data_model = lw.RIPPER(random_state = 36)
post_data_model = lw.RIPPER(random_state = 28)

# Pre train
pre_data_model.fit(
    sub1_X_train, 
    sub1_y_train, 
    pos_class = 'Pre')

# Med train
med_data_model.fit(
    sub1_X_train, 
    sub1_y_train, 
    pos_class = 'Med')

示例#5

0

显示文件

文件： RIPPER.py 项目： iardatuna/Data-Mining-Classification-and-Prediction

ripper_train, ripper_test = train_test_split(ripper_dataset,
                                             test_size=0.2,
                                             random_state=123)
print("")
print("Train size for Decision Tree" + " ------------>  " + str(x_train.shape))
print("")
print("Test size for Decision Tree" + "  ------------>  " + str(x_test.shape))
print("")
print("")
print("Train size for Ripper" + " ------------>  " + str(ripper_train.shape))
print("")
print("Test size for Ripper" + "  ------------>  " + str(ripper_test.shape))
print("")
#Part e

ripper_clf = lw.RIPPER()
ripper_start_time = time.time()
ripper_clf.fit(ripper_train, class_feat="target", random_state=123)
ripper_predict = ripper_clf.predict(ripper_test)
ripper_run_time = time.time() - ripper_start_time

#Part f

print("Decision Tree with Entropy")
print("")
entropy_data = DecisionTreeClassifier(criterion="entropy",
                                      random_state=123,
                                      max_depth=5)
entropy_start_time = time.time()
entropy_data = entropy_data.fit(x_train, y_train)
entropy_pred_data = entropy_data.predict(x_test)

示例#6

0

显示文件

文件： Binary_classification_methods.py 项目： nicolasgapa/ML-methods-for-binary-classification

# Datasets.
# -------------------- #
for dataset in datasets:
    
    # Print database.
    dataset_name = [n for n in globals() if globals()[n] is dataset][0]
    print('Dataset: ', dataset_name)
    
    # Read dataset, and obtain X and y matrices.
    dataset = pd.read_csv(dataset)
    X = dataset.drop('class', axis=1)
    y =  dataset['class']
    print('Size: ', X.shape[0])
    print('-------------------')
    
    # Compute scores.
    m1, s1 = compute_scores(DecisionTreeClassifier(criterion="entropy", splitter='random'), X, y)
    m2, s2 = compute_scores(lw.RIPPER(), X, y) # lw.IREP()
    m3, s3 = compute_scores(KNeighborsClassifier(n_neighbors=5), X, y)
    m4, s4 = compute_scores(GaussianNB(), X, y)
    m5, s5 = compute_scores(svm.SVC(C = 1), X, y)
    m6, s6 = compute_scores(AdaBoostClassifier(n_estimators=100, random_state=0), X, y)
    ms, ss = [m1, m2, m3, m4, m5, m6], [s1, s2, s3, s4, s5, s6]
    
    # Print results.
    results = pd.DataFrame(data = {'CV score': [round(100*i, 2) for i in ms],
                            '+-2std': [round(2*100*i, 2) for i in ss]}, 
                           index = ['Decision-tree', 'Rule-based',
                                    'K-neighbours', 'Naive Bayes', 
                                    'Support Vector Machine', 'Adaboost'])
    print(results)

示例#7

0

显示文件

文件： Exercise2.py 项目： kturk/Data-Mining-Assignments

    # Exercise D #

    # Train-Test split part.
    trainData, testData, trainTarget, testTarget = train_test_split(sub_df.drop(['target'], axis=1), target,
                                                                    test_size=0.2, random_state=0)

    # Exercise E #

    # Taking copy of Train-Test to not mess with original data.
    ripperTrainData    = trainData.copy()
    ripperTestData     = testData.copy()
    ripperTrainTarget  = trainTarget.copy()
    ripperTestTarget   = testTarget.copy()

    ripper = lw.RIPPER()                                              # Ripper creation.

    ripperStartTime  = time()                                         # Start time of fit process with Ripper.
    ripper.fit(ripperTrainData, ripperTrainTarget)                    # Ripper's fit process.
    ripperEndTime    = time()                                         # End time of fit process with Ripper.
    ripperScore      = ripper.score(ripperTestData, ripperTestTarget) # Ripper score calculation.


    print("\nElapsed time for ripper algorithm is:", ripperEndTime - ripperStartTime)
    print("Accuracy of Ripper algorithm is:", ripperScore)

    # Exercise F #

    # Taking copy of Train-Test to not mess with original data.
    treeTrainData   = trainData.copy()
    treeTestData    = testData.copy()