Пример #1
0
def rakeld_ensemble(vec, label):
    problem_transform_classifier = LabelPowerset(classifier=LinearSVC(),
                                                 require_dense=[False, True])
    classifier = RakelD(classifier=problem_transform_classifier,
                        labelset_size=5)
    classifier.fit(vec, label)
    return classifier
def RAkELd(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,
           base_clasif, num_labels):
    classifier = RakelD(base_classifier=base_clasif, labelset_size=num_labels)

    classifier.fit(dataset_train_x, dataset_train_y)
    predictions = classifier.predict(dataset_test_x)

    Metrics_Accuracy("RAkELd", predictions, dataset_test_y)
def build_Rake(X_train, y_train, X_test, y_test):

    classifier = RakelD(base_classifier=GaussianNB(),
                        base_classifier_require_dense=[True, True],
                        labelset_size=4)

    classifier.fit(X_train, y_train)
    prediction = classifier.predict(X_test)
    print('Test accuracy is {}'.format(accuracy_score(y_test, prediction)))
def RAkELd(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,
           base_clasif, num_labels):
    classifier = RakelD(base_classifier=base_clasif, labelset_size=num_labels)

    start_time = time.time()
    classifier.fit(dataset_train_x, dataset_train_y)
    stop_time = time.time()
    time_lapsed = stop_time - start_time
    predictions = classifier.predict(dataset_test_x)

    Metrics_Accuracy("RAkELd", predictions, dataset_test_y)
    print("Execution time: {}s".format(time_lapsed))
Пример #5
0
def run(classifier, train_test_set):
    X_train, X_test, y_train, y_test = train_test_set

    # init model and fit to train data
    rakel = RakelD(base_classifier=classifier)
    rakel.fit(X_train, y_train)

    # make predictions
    y_pred = rakel.predict(X_test)
    print('\n--------Rakel with {:}'.format(rakel))

    return y_test, y_pred
def GridSearchCV_base(classif, dataset_train_x, dataset_train_y):
    rangefloat = [round(x * 0.1, 1) for x in range(1, 11)]
    parameters = [
        {
            'base_classifier': [GaussianNB()],
            #'labelset_size':
        },
        {
            'base_classifier': [MultinomialNB()],
            'base_classifier__alpha':
            rangefloat,  #for smoothing {Additive smoothing parameter NB}
        },
        {
            'base_classifier': [SVC()],
            'base_classifier__kernel': ['rbf', 'linear', 'sigmoid'],
        },
    ]

    classifier = GridSearchCV(RakelD(),
                              parameters,
                              scoring=make_scorer(metrics.hamming_loss,
                                                  greater_is_better=False),
                              n_jobs=3)
    classifier.fit(dataset_train_x, dataset_train_y)
    return classifier.best_params_
Пример #7
0
def RAkEL_fit(clfs, steps, X_train, y_train, X_test, y_test):
    metrics = {}
    for key, clf in zip(clfs.keys(), clfs.values()):
        acc = []
        prec_micro = []
        prec_macro = []
        hamm_loss = []
        f1_micro = []
        f1_macro = []
        print('Fitting RAkEL with Base Classifier: %s' % key)
        for k in steps:
            classifier = RakelD(base_classifier=clf, labelset_size=k)
            classifier.fit(X_train, y_train)
            prediction = classifier.predict(X_test)
            acc.append(accuracy_score(y_test, prediction))
            prec_micro.append(precision_score(y_test, prediction, average='micro'))
            prec_macro.append(precision_score(y_test, prediction, average='macro'))
            hamm_loss.append(hamming_loss(y_test, prediction))
            f1_micro.append(f1_score(y_test, prediction, average='micro'))
            f1_macro.append(f1_score(y_test, prediction, average='macro'))

        metrics[key] = [acc, hamm_loss, f1_micro, f1_macro, prec_micro, prec_macro]

    return metrics
def rakel_model(X_train, X_test, y_train, y_test, labels, seed):
    rakel = Pipeline([
        ('count_vectorizer', CountVectorizer()),
        ('tf-idf_log', TfidfTransformer(sublinear_tf=True)),
        ('rakel',
         RakelD(base_classifier=LinearSVC(C=1,
                                          class_weight='balanced',
                                          random_state=seed),
                base_classifier_require_dense=[True, True],
                labelset_size=3))
    ])
    # train and predict model
    start_time = time.time()
    rakel.fit(X_train, y_train)
    prediction = rakel.predict(X_test)
    stop_time = time.time()
    # calculate scores
    f1 = f1_score(y_test, prediction, average=None)
    accuracy = jaccard_score(y_test, prediction, average=None)

    return f1, accuracy, stop_time - start_time
def Util_ClassifierMethods(dataset_train_x, dataset_train_y, dataset_test_x,
                           dataset_test_y):
    #BR
    Util_Title("Binary Relevance")
    base_classif = GaussianNB()
    BinaryRelevance(dataset_train_x, dataset_train_y, dataset_test_x,
                    dataset_test_y, base_classif, "GaussianNB")

    dict_res = FindBestSVCParams(skpt.BinaryRelevance(), dataset_train_x,
                                 dataset_train_y)
    base_classif = SVC(kernel=dict_res['classifier__kernel'],
                       degree=dict_res['classifier__degree'])
    BinaryRelevance(dataset_train_x, dataset_train_y, dataset_test_x,
                    dataset_test_y, base_classif, "SVC tuned")

    dict_res = FindBestMNBParams(skpt.BinaryRelevance(), dataset_train_x,
                                 dataset_train_y)
    base_classif = MultinomialNB(alpha=dict_res['classifier__alpha'])
    BinaryRelevance(dataset_train_x, dataset_train_y, dataset_test_x,
                    dataset_test_y, base_classif, "MNB tuned")

    #CC
    Util_Title("Classifier Chain")
    base_classif = GaussianNB()
    ClassifierChain(dataset_train_x, dataset_train_y, dataset_test_x,
                    dataset_test_y, base_classif, "GaussianNB")

    dict_res = FindBestSVCParams(skpt.ClassifierChain(), dataset_train_x,
                                 dataset_train_y)
    base_classif = SVC(kernel=dict_res['classifier__kernel'],
                       degree=dict_res['classifier__degree'])
    ClassifierChain(dataset_train_x, dataset_train_y, dataset_test_x,
                    dataset_test_y, base_classif, "SVC tuned")

    dict_res = FindBestMNBParams(skpt.ClassifierChain(), dataset_train_x,
                                 dataset_train_y)
    base_classif = MultinomialNB(alpha=dict_res['classifier__alpha'])
    ClassifierChain(dataset_train_x, dataset_train_y, dataset_test_x,
                    dataset_test_y, base_classif, "MNB tuned")

    #LP
    Util_Title("Label Powerset")
    base_classif = GaussianNB()
    LabelPowerset(dataset_train_x, dataset_train_y, dataset_test_x,
                  dataset_test_y, base_classif, "GaussianNB")

    dict_res = FindBestSVCParams(skpt.LabelPowerset(), dataset_train_x,
                                 dataset_train_y)
    base_classif = SVC(kernel=dict_res['classifier__kernel'],
                       degree=dict_res['classifier__degree'])
    LabelPowerset(dataset_train_x, dataset_train_y, dataset_test_x,
                  dataset_test_y, base_classif, "SVC tuned")

    dict_res = FindBestMNBParams(skpt.LabelPowerset(), dataset_train_x,
                                 dataset_train_y)
    base_classif = MultinomialNB(alpha=dict_res['classifier__alpha'])
    LabelPowerset(dataset_train_x, dataset_train_y, dataset_test_x,
                  dataset_test_y, base_classif, "MNB tuned")

    #MLkNN
    Util_Title("MLkNN")
    dict_res = FindBestK(skadapt.MLkNN(), dataset_train_x, dataset_train_y)
    MLkNN(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,
          dict_res['k'], dict_res['s'])

    #MLARAM
    Util_Title("MLARAM")
    dict_res = FindBestVT(dataset_train_x, dataset_train_y)
    MLARAM(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,
           dict_res['vigilance'], dict_res['threshold'])

    #BRkNNa
    Util_Title("BRkNNa")
    dict_res = FindBestK(skadapt.BRkNNaClassifier(), dataset_train_x,
                         dataset_train_y)
    BRkNNa(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,
           dict_res['k'])

    #BRkNNb
    Util_Title("BRkNNb")
    dict_res = FindBestK(skadapt.BRkNNbClassifier(), dataset_train_x,
                         dataset_train_y)
    BRkNNb(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,
           dict_res['k'])

    #RAkELD
    Util_Title("RAkELd")
    dict_res = GridSearchCV_baseRakel(RakelD(), dataset_train_x,
                                      dataset_train_y)
    RAkELd(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,
           dict_res['base_classifier'], dict_res['labelset_size'])

    #RAkELo
    Util_Title("RAkELo")
    dict_res = GridSearchCV_baseRakel(RakelO(), dataset_train_x,
                                      dataset_train_y)
    RAkELO(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,
           dict_res['base_classifier'], dict_res['labelset_size'],
           dict_res['model_count'])

    #MLTSVM
    Util_Title("MLTSVM")
    dict_res = FindCKParam(dataset_train_x, dataset_train_y, dataset_test_x,
                           dataset_test_y)
    TwinMLSVM(dataset_train_x, dataset_train_y, dataset_test_x, dataset_test_y,
              dict_res['c_k'], dict_res['sor_omega'])
Пример #10
0
 def get_rakeld_with_nb(self):
     return RakelD(base_classifier=GaussianNB(),
                   base_classifier_require_dense=[True, True],
                   labelset_size=TEST_LABELSET_SIZE)
Пример #11
0
classifier = LabelSpacePartitioningClassifier(
    problem_transform_classifier,
    clusterer)  # setup the ensemble metaclassifier
classifier.fit(X_train, t_train)
predictions = classifier.predict(X_test)  # all zero using SVC
probabilities = classifier.predict_proba(X_test)
accuracy_score(t_test,
               predictions)  # 0.029049295774647887, the result is reasonable
mean_squared_error(t_test.toarray(), probabilities.toarray())
# by trying different cluster methods, the walktrap initially is 0.043, greedy 0.029, infomap 0.051
# with a naive test of boosting with some combinations of parameters, forest is better than boosting

# Rakel
base_classifier = RandomForestClassifier()
problem_transform_classifier = LabelPowerset(classifier=base_classifier)
classifier = RakelD(problem_transform_classifier,
                    labelset_size=3)  # setup the ensemble meta-classifier
classifier.fit(X_train, t_train)
predictions = classifier.predict(X_test)
probabilities = classifier.predict_proba(X_test)
accuracy_score(
    t_test,
    predictions)  # 0.0079225352112676055, random partition is not good here
mean_squared_error(t_test.toarray(), probabilities.toarray())

# parameter tuning of space partitioning with clusterer
parameters = {
    'classifier':
    [LabelPowerset()],  # BinaryRelevance performs pretty bad here
    'clusterer': [
        IGraphLabelCooccurenceClusterer('infomap',
                                        weighted=True,
Пример #12
0
    def test_if_works_with_cross_validation(self):
        classifier = RakelD(classifier=self.get_labelpowerset_with_nb(),
                            labelset_size=3)

        self.assertClassifierWorksWithCV(classifier)
Пример #13
0
ft_OT.rename(mapper=lambda x: x + "_OT", axis=1, inplace=True)

X = np.concatenate((ft_FP, ft_OT), axis=1)

scoring_funcs = {
    "hamming loss": hamming_func,
    "aiming": aiming_func,
    "coverage": coverage_func,
    "accuracy": accuracy_func,
    "absolute true": absolute_true_func,
}  # Keep recorded

parameters = {'labelset_size': [2, 3, 4, 5, 6, 7, 8, 9, 10]}

rakeld = GridSearchCV(RakelD(
    base_classifier=GaussianNB(),
    baseclassifier_require_dense=[True, True],
),
                      param_grid=parameters,
                      n_jobs=-1,
                      cv=loocv,
                      scoring=scoring_funcs,
                      verbose=3,
                      refit="absolute true")

rakeld.fit(X, Y.values)
print(rakeld.best_score_)

mytuple = (rakeld, )

to_save = dump(mytuple, filename="rakeld.joblib")
Пример #14
0
def pipeline(method, X_train, y_train, scoring, params=None, search_r=True, best=None):
    if search_r:
        # Random search params
        r = np.random.uniform(-2, 2, size=5)
        C = np.array(10 ** r)
        alpha = np.random.uniform(0, 1, size=5)

        params_tree = {'__max_depth': sp.randint(1, 30),
                       '__max_features': sp.randint(1, X_train.shape[1]),
                       '__min_samples_split': sp.randint(2, X_train.shape[0] / 3),
                       '__criterion': ['gini', 'entropy']}
        params_lgr = {'__C': C}
        params_nb = {'__alpha': alpha}

        tree_k, tree_v = list(params_tree.keys()), list(params_tree.values())
        lgr_k, lgr_v = list(params_lgr.keys()), list(params_lgr.values())
        nb_k, nb_v = list(params_nb.keys()), list(params_nb.values())

    else:
        params_cc, params_rk, params_bn = params[0], params[1], params[2]

    if method == 'CC':
        base_str = 'base_estimator'
        if search_r:
            params_tree, params_lgr, params_nb = redefine(base_str, tree_k, tree_v), \
                                                 redefine(base_str, lgr_k, lgr_v), \
                                                 redefine(base_str, nb_k, nb_v)
            params = [params_lgr, params_tree, params_nb]
        else:
            params = params_cc
            tree_k, tree_v = list(params[1].keys()), list(params[1].values())
            lgr_k, lgr_v = list(params[0].keys()), list(params[0].values())
            nb_k, nb_v = list(params[2].keys()), list(params[2].values())

            params_tree, params_lgr, params_nb = redefine(base_str, tree_k, tree_v), \
                                                 redefine(base_str, lgr_k, lgr_v), \
                                                 redefine(base_str, nb_k, nb_v)
            params = [params_lgr, params_tree, params_nb]

        print(colored('Fitting Classifiers Chain pipeline...', 'green'))
        classifiers = {
            "Logistic Regression": ClassifierChain(LogisticRegression(random_state=0, solver='lbfgs', n_jobs=-1)),
            "Decision Tree Classifier": ClassifierChain(DecisionTreeClassifier()),
            "MultinomialNB": ClassifierChain(MultinomialNB())}

    elif method == 'RAkEL':
        base_str = 'base_classifier'
        if search_r:
            params_tree, params_lgr, params_nb = redefine(base_str, tree_k, tree_v), \
                                                 redefine(base_str, lgr_k, lgr_v), \
                                                 redefine(base_str, nb_k, nb_v)
            params = [params_lgr, params_tree, params_nb]

        else:
            params = params_rk
            tree_k, tree_v = list(params[1].keys()), list(params[1].values())
            lgr_k, lgr_v = list(params[0].keys()), list(params[0].values())
            nb_k, nb_v = list(params[2].keys()), list(params[2].values())

            params_tree, params_lgr, params_nb = redefine(base_str, tree_k, tree_v), \
                                                 redefine(base_str, lgr_k, lgr_v), \
                                                 redefine(base_str, nb_k, nb_v)
            params = [params_lgr, params_tree, params_nb]
        print(colored('Fitting RAkEL pipeline...', 'green'))
        classifiers = {"Logistic Regression": RakelD(LogisticRegression(random_state=0, solver='lbfgs', n_jobs=-1)),
                       "Decision Tree Classifier": RakelD(DecisionTreeClassifier(),
                                                          labelset_size=5),
                       "MultinomialNB": RakelD(MultinomialNB(),
                                               labelset_size=5)}

    elif method == 'BinaryRelevance':
        base_str = 'classifier'
        if search_r:

            params_tree, params_lgr, params_nb = redefine(base_str, tree_k, tree_v), \
                                                 redefine(base_str, lgr_k, lgr_v), \
                                                 redefine(base_str, nb_k, nb_v)
            params = [params_lgr, params_tree, params_nb]
        else:
            params = params_bn
            tree_k, tree_v = list(params[1].keys()), list(params[1].values())
            lgr_k, lgr_v = list(params[0].keys()), list(params[0].values())
            nb_k, nb_v = list(params[2].keys()), list(params[2].values())

            params_tree, params_lgr, params_nb = redefine(base_str, tree_k, tree_v), \
                                                 redefine(base_str, lgr_k, lgr_v), \
                                                 redefine(base_str, nb_k, nb_v)
            params = [params_lgr, params_tree, params_nb]
        print(colored('Fitting BinaryRelevance pipeline...', 'green'))
        classifiers = {
            "Logistic Regression": BinaryRelevance(LogisticRegression(random_state=0, solver='lbfgs', n_jobs=-1)),
            "Decision Tree Classifier": BinaryRelevance(DecisionTreeClassifier()),
            "MultinomialNB": BinaryRelevance(MultinomialNB())}

    else:
        raise ValueError('Invalid method passed. Expected one of: "CC", "RAkEL", "BinaryRelevance", got {} instead'
                         .format(method))

    res = {}
    for keys, classifier, par in zip(classifiers.keys(), classifiers.values(), params):
        res[keys] = hyperparameters_search(classifier, par, X_train, y_train, best, scoring, keys,
                                           candidates=30, random_search=search_r)
Пример #15
0
def class_multi_label(x, Y, model, wekamodelname, value):

    # detect is the data classification is a multi-label problem.
    num_of_labels = Y.ndim
    print("\n\n-----------------------------------------------------------\n")
    if (num_of_labels == 1):
        print("This is not a multi-label problem!!!!!!")
        return model
    javapath = "C:\\" "Program Files" "\\Java\\jre1.8.0_251\\bin\\javaw.exe"

    myclasspath = download_meka()
    print(myclasspath)
    try:
        while 1:
            if (value < 1) or (value > 9):
                print("This is a Multi label problem")
                print("Please select:")
                print("1. For binary relevance")
                print("2. For pairwise comparison")
                print("3. Calibrated label ranking")
                print("4. Chain classifier ")
                print("5. PowerSet no pruning ")
                print("6. PowerSet with pruning ")
                print("7. Random-k Labelsets ")
                print("8. Pairwise comparison ")
                print("9. Multi Label knn ")
                value = input("Please enter a choice:\n")

            if value == 1:
                print("Applying binary relevance")
                #clf=BinaryRelevance(classifier=model,require_dense=[False, True])
                if wekamodelname == "nothing":
                    print("WEKA does not support this classifier")
                    clf = 0
                    break
                clf = Meka(
                    meka_classifier="meka.classifiers.multilabel.BR",
                    weka_classifier=wekamodelname,
                    meka_classpath=myclasspath,
                    java_command=javapath  # path to java executable
                )
                break
            elif value == 2:
                print("Fourclass Pairwise")
                if wekamodelname == "nothing":
                    print("WEKA does not support this classifier")
                    clf = 0
                    break
                clf = Meka(
                    meka_classifier="meka.classifiers.multilabel.FW",
                    weka_classifier=wekamodelname,
                    meka_classpath=myclasspath,
                    java_command=javapath  # path to java executable
                )
                break
            elif value == 3:
                print("Applying calibrated label ranking")
                if wekamodelname == "nothing":
                    print("WEKA does not support this classifier")
                    clf = 0
                    break
                clf = Meka(
                    meka_classifier="meka.classifiers.multilabel.MULAN",
                    weka_classifier=wekamodelname + " -S CLR",
                    meka_classpath=myclasspath,
                    java_command=javapath  # path to java executable
                )
                break

            elif value == 4:
                print("Applying Chain Classifier")
                ##clf = ClassifierChain(classifier=model,require_dense=[False, True])

                if wekamodelname == "nothing":
                    print("WEKA does not support this classifier")
                    clf = 0
                    break
                clf = Meka(
                    meka_classifier="meka.classifiers.multilabel.CC",
                    weka_classifier=wekamodelname,
                    meka_classpath=myclasspath,
                    java_command=javapath  # path to java executable
                )
                break
            elif value == 5:
                print("Applying powerset NO pruning")
                clf = LabelPowerset(classifier=model,
                                    require_dense=[False, True])
                break
            elif value == 6:
                print("Applying powerset with pruning")
                if wekamodelname == "nothing":
                    print("WEKA does not support this classifier")
                    clf = 0
                    break
                clf = Meka(
                    meka_classifier="meka.classifiers.multilabel.PS",
                    weka_classifier=wekamodelname,
                    meka_classpath=myclasspath,
                    java_command=javapath  # path to java executable
                )
                break
            elif value == 7:
                print("Applying Random-k Labelsets")
                try:
                    clf = RakelD(base_classifier=model,
                                 base_classifier_require_dense=[False, True],
                                 labelset_size=4)
                except:
                    print("RakelD  exception")
                break
            elif value == 8:
                print("Monte-Carlo Classifier Chains")
                if wekamodelname == "nothing":
                    print("WEKA does not support this classifier")
                    clf = 0
                    break
                clf = Meka(
                    meka_classifier="meka.classifiers.multilabel.MCC",
                    weka_classifier=wekamodelname,
                    meka_classpath=myclasspath,
                    java_command=javapath  # path to java executable
                )
                break
            elif value == 9:
                print("Applying Multilabel k Nearest Neighbours")
                try:
                    clf = MLkNN(k=3)
                except:
                    print("Multilabel k Nearest Neighbours exception")
                break

            else:
                print("Try again!!!!")
    except:
        print("\nSomething went wrong, but continue\n")
    return clf
Пример #16
0
    def test_if_dense_classification_works_on_dense_base_classifier(self):
        classifier = RakelD(classifier=self.get_labelpowerset_with_nb(),
                            labelset_size=3)

        self.assertClassifierWorksWithSparsity(classifier, 'dense')
#Methode 3 : Chaineclassifieur

clf = clf = ClassifierChain(classifier=RandomForestClassifier(max_depth=200),
                            require_dense=[False, True])
anova_clf = Pipeline([('anova', vare), ('chaine', clf)])
anova_clf.fit(Xtrain, Ytrain)
pred = anova_clf.predict(Xtest)
matrix = multilabel_confusion_matrix(Ytest, pred)
accuracy = accuracy_score(Ytest, pred)
print(accuracy)

#Methode  4 : onevsrest
clf = OneVsRestClassifier(RandomForestClassifier(n_estimators=100))
anova_clf = Pipeline([('anova', vare), ('oneVSrest', clf)])
anova_clf.fit(Xtrain, Ytrain)
pred = anova_clf.predict(Xtest)
matrix = multilabel_confusion_matrix(Ytest, pred)
accuracy = accuracy_score(Ytest, pred)
print(accuracy)

## Methode 5 : Rakel

clf = clf = RakelD(labelset_size=2, base_classifier=RandomForestClassifier())
anova_clf = Pipeline([('anova', vare), ('Rekel', clf)])
anova_clf.fit(Xtrain, Ytrain)
pred = anova_clf.predict(Xtest)
matrix = multilabel_confusion_matrix(Ytest, pred)
accuracy = accuracy_score(Ytest, pred)
print(accuracy)
Пример #18
0
ft_OT.rename(mapper=lambda x: x + "_OT", axis=1, inplace=True)

X = np.concatenate((ft_FP, ft_OT), axis=1)

scoring_funcs = {
    "hamming loss": hamming_func,
    "aiming": aiming_func,
    "coverage": coverage_func,
    "accuracy": accuracy_func,
    "absolute true": absolute_true_func,
}  # Keep recorded

parameters = {'labelset_size': [2, 3, 4, 5, 6, 7, 8, 9, 10]}

rakeld = GridSearchCV(RakelD(
    base_classifier=RandomForestClassifier(),
    baseclassifier_require_dense=[True, True],
),
                      param_grid=parameters,
                      n_jobs=-1,
                      cv=loocv,
                      scoring=scoring_funcs,
                      verbose=3,
                      refit="absolute true")

rakeld.fit(X, Y.values)
print(rakeld.best_score_)

mytuple = (rakeld, )

to_save = dump(mytuple, filename="rakeld-rf.joblib")
Пример #19
0
    X = pca.transform(X)
    svd = TruncatedSVD(dims)
    Xpca = svd.fit_transform(BOW_right)
    #For BOW, uncomment this line: 
    #X_shuf, y_hot_shuf = shuffle(Xpca, y_hot, random_state = 7)
    #For anything else, uncomment this line:
    X_shuf, y_hot_shuf = shuffle(X, y_hot,random_state = 7)

        
        
        
        
    #Five folds
    classifier = RakelD(
                        base_classifier=GaussianNB(),
                        base_classifier_require_dense=[True, True],
                        labelset_size=3
                    )
    kfold = KFold(n_splits=5, random_state = 7)
    #There is randomness inherent in this, so these numbers will change
    scores = cross_val_score(classifier, X_shuf, y_hot_shuf, cv=kfold, scoring='f1_micro')
    print("Scores")
    print(np.mean(scores))
    
    
    kf = KFold(n_splits=5, random_state = 7)
    kf.get_n_splits(X_shuf)
    
    accs = []
    h_scs = []
    for train_index, test_index in kf.split(X_shuf):
Пример #20
0
# initialize classifier chains multi-label classifier

from sklearn.metrics import hamming_loss
from sklearn.metrics import f1_score
from sklearn.metrics import jaccard_similarity_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

from sklearn.naive_bayes import GaussianNB
from skmultilearn.ensemble import RakelD
from datetime import timedelta
import time
start = time.time()

classifier = RakelD(base_classifier=GaussianNB(),
                    base_classifier_require_dense=[True, True],
                    labelset_size=4)

classifier.fit(x_train, y_train)
predictions = classifier.predict(x_test)

# accuracy
print("Accuracy = ", accuracy_score(y_test, predictions))
print("\n")
print("F1 = ", f1_score(y_test, predictions, average='micro'))
print("\n")

print("Jaccard = ", jaccard_similarity_score(y_test, predictions))
print("\n")

print("Precision = ", precision_score(y_test, predictions, average='micro'))
Пример #21
0
 def get_rakeld_with_svc(self):
     return RakelD(base_classifier=SVC(probability=True),
                   base_classifier_require_dense=[False, True],
                   labelset_size=TEST_LABELSET_SIZE)