def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
    from shogun.Features import RealFeatures, Labels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
    from shogun.Classifier import ECOCStrategy, ECOCOVREncoder, ECOCHDDecoder, MulticlassOneVsRestStrategy

    feats_train = RealFeatures(fm_train_real)
    feats_test  = RealFeatures(fm_test_real)

    labels = Labels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)

    mc_classifier = LinearMulticlassMachine(MulticlassOneVsRestStrategy(), feats_train, classifier, labels)
    mc_classifier.train()
    out_mc = mc_classifier.apply(feats_test).get_labels()

    ecoc_strategy = ECOCStrategy(ECOCOVREncoder(), ECOCHDDecoder())
    ecoc_classifier = LinearMulticlassMachine(ecoc_strategy, feats_train, classifier, labels)
    ecoc_classifier.train()
    out_ecoc = ecoc_classifier.apply(feats_test).get_labels()

    n_diff = (out_mc != out_ecoc).sum()
    if n_diff == 0:
        print("Same results for OvR and ECOCOvR")
    else:
        print("Different results for OvR and ECOCOvR (%d out of %d are different)" % (n_diff, len(out_mc)))

    return out_ecoc, out_mc
Exemplo n.º 2
0
def create_svm(param, data, lab):
    """
    create SVM object with standard settings
    
    @param param: parameter object
    @param data: kernel or feature object (for kernelized/linear svm)
    @param lab: label object
    
    @return: svm object
    """

    # create SVM
    if param.flags.has_key(
            "svm_type") and param.flags["svm_type"] == "liblineardual":
        print "creating LibLinear object"
        svm = LibLinear(param.cost, data, lab)
        svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL)

        # set solver type
        if param.flags.has_key(
                "solver_type") and param.flags["solver_type"] == "L2R_LR":
            print "setting linear solver type to: L2R_LR"
            svm.set_liblinear_solver_type(L2R_LR)

    else:
        print "creating SVMLight object"
        svm = SVMLight(param.cost, data, lab)

    return set_svm_parameters(svm, param)
Exemplo n.º 3
0
def classifier_multiclasslinearmachine_modular(
        fm_train_real=traindat,
        fm_test_real=testdat,
        label_train_multiclass=label_traindat,
        label_test_multiclass=label_testdat,
        width=2.1,
        C=1,
        epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)
    mc_classifier = LinearMulticlassMachine(MulticlassOneVsOneStrategy(),
                                            feats_train, classifier, labels)

    mc_classifier.train()
    label_pred = mc_classifier.apply()
    out = label_pred.get_labels()

    if label_test_multiclass is not None:
        from shogun.Evaluation import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc = evaluator.evaluate(label_pred, labels_test)
        print('Accuracy = %.4f' % acc)

    return out
def modelselection_grid_search_linear_modular(traindat=traindat,
                                              label_traindat=label_traindat):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY
    from shogun.Evaluation import StratifiedCrossValidationSplitting
    from shogun.ModelSelection import GridSearchModelSelection
    from shogun.ModelSelection import ModelSelectionParameters, R_EXP
    from shogun.ModelSelection import ParameterCombination
    from shogun.Features import Labels
    from shogun.Features import RealFeatures
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC

    # build parameter tree to select C1 and C2
    param_tree_root = ModelSelectionParameters()
    c1 = ModelSelectionParameters("C1")
    param_tree_root.append_child(c1)
    c1.build_values(-2.0, 2.0, R_EXP)

    c2 = ModelSelectionParameters("C2")
    param_tree_root.append_child(c2)
    c2.build_values(-2.0, 2.0, R_EXP)

    # training data
    features = RealFeatures(traindat)
    labels = Labels(label_traindat)

    # classifier
    classifier = LibLinear(L2R_L2LOSS_SVC)

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    classifier.print_modsel_params()

    # splitting strategy for cross-validation
    splitting_strategy = StratifiedCrossValidationSplitting(labels, 10)

    # evaluation method
    evaluation_criterium = ContingencyTableEvaluation(ACCURACY)

    # cross-validation instance
    cross_validation = CrossValidation(classifier, features, labels,
                                       splitting_strategy,
                                       evaluation_criterium)

    # model selection instance
    model_selection = GridSearchModelSelection(param_tree_root,
                                               cross_validation)

    # perform model selection with selected methods
    #print "performing model selection of"
    #param_tree_root.print_tree()
    best_parameters = model_selection.select_model()

    # print best parameters
    #print "best parameters:"
    #best_parameters.print_tree()

    # apply them and print result
    best_parameters.apply_to_machine(classifier)
    result = cross_validation.evaluate()
def features_director_dot_modular(fm_train_real, fm_test_real,
                                  label_train_twoclass, C, epsilon):

    from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL
    from shogun.Mathematics import Math_init_random
    Math_init_random(17)

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    labels = BinaryLabels(label_train_twoclass)

    dfeats_train = NumpyFeatures(fm_train_real)
    dfeats_test = NumpyFeatures(fm_test_real)
    dlabels = BinaryLabels(label_train_twoclass)

    print feats_train.get_computed_dot_feature_matrix()
    print dfeats_train.get_computed_dot_feature_matrix()

    svm = LibLinear(C, feats_train, labels)
    svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL)
    svm.set_epsilon(epsilon)
    svm.set_bias_enabled(True)
    svm.train()

    svm.set_features(feats_test)
    svm.apply().get_labels()
    predictions = svm.apply()

    dfeats_train.__disown__()
    dfeats_train.parallel.set_num_threads(1)
    dsvm = LibLinear(C, dfeats_train, dlabels)
    dsvm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL)
    dsvm.set_epsilon(epsilon)
    dsvm.set_bias_enabled(True)
    dsvm.train()

    dfeats_test.__disown__()
    dfeats_test.parallel.set_num_threads(1)
    dsvm.set_features(dfeats_test)
    dsvm.apply().get_labels()
    dpredictions = dsvm.apply()

    return predictions, svm, predictions.get_labels()
def train_svm(feats_train, labels, C=1):
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, L2R_L2LOSS_SVC_DUAL

    epsilon = 1e-3
    svm = LibLinear(C, feats_train, labels)
    svm.set_liblinear_solver_type(L2R_L2LOSS_SVC)
    svm.set_epsilon(epsilon)
    svm.set_bias_enabled(False)
    svm.train()

    return svm
Exemplo n.º 7
0
def classifier_multiclass_ecoc_ovr(fm_train_real=traindat,
                                   fm_test_real=testdat,
                                   label_train_multiclass=label_traindat,
                                   label_test_multiclass=label_testdat,
                                   lawidth=2.1,
                                   C=1,
                                   epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
    from shogun.Classifier import ECOCStrategy, ECOCOVREncoder, ECOCLLBDecoder, MulticlassOneVsRestStrategy

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)

    mc_classifier = LinearMulticlassMachine(MulticlassOneVsRestStrategy(),
                                            feats_train, classifier, labels)
    mc_classifier.train()
    label_mc = mc_classifier.apply(feats_test)
    out_mc = label_mc.get_labels()

    ecoc_strategy = ECOCStrategy(ECOCOVREncoder(), ECOCLLBDecoder())
    ecoc_classifier = LinearMulticlassMachine(ecoc_strategy, feats_train,
                                              classifier, labels)
    ecoc_classifier.train()
    label_ecoc = ecoc_classifier.apply(feats_test)
    out_ecoc = label_ecoc.get_labels()

    n_diff = (out_mc != out_ecoc).sum()
    if n_diff == 0:
        print("Same results for OvR and ECOCOvR")
    else:
        print(
            "Different results for OvR and ECOCOvR (%d out of %d are different)"
            % (n_diff, len(out_mc)))

    if label_test_multiclass is not None:
        from shogun.Evaluation import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc_mc = evaluator.evaluate(label_mc, labels_test)
        acc_ecoc = evaluator.evaluate(label_ecoc, labels_test)
        print('Normal OVR Accuracy = %.4f' % acc_mc)
        print('ECOC OVR Accuracy   = %.4f' % acc_ecoc)

    return out_ecoc, out_mc
Exemplo n.º 8
0
def classifier_multiclass_ecoc_random(fm_train_real=traindat,
                                      fm_test_real=testdat,
                                      label_train_multiclass=label_traindat,
                                      label_test_multiclass=label_testdat,
                                      lawidth=2.1,
                                      C=1,
                                      epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
    from shogun.Classifier import ECOCStrategy, ECOCRandomSparseEncoder, ECOCRandomDenseEncoder, ECOCHDDecoder

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)

    rnd_dense_strategy = ECOCStrategy(ECOCRandomDenseEncoder(),
                                      ECOCHDDecoder())
    rnd_sparse_strategy = ECOCStrategy(ECOCRandomSparseEncoder(),
                                       ECOCHDDecoder())

    dense_classifier = LinearMulticlassMachine(rnd_dense_strategy, feats_train,
                                               classifier, labels)
    dense_classifier.train()
    label_dense = dense_classifier.apply(feats_test)
    out_dense = label_dense.get_labels()

    sparse_classifier = LinearMulticlassMachine(rnd_sparse_strategy,
                                                feats_train, classifier,
                                                labels)
    sparse_classifier.train()
    label_sparse = sparse_classifier.apply(feats_test)
    out_sparse = label_sparse.get_labels()

    if label_test_multiclass is not None:
        from shogun.Evaluation import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc_dense = evaluator.evaluate(label_dense, labels_test)
        acc_sparse = evaluator.evaluate(label_sparse, labels_test)
        print('Random Dense Accuracy  = %.4f' % acc_dense)
        print('Random Sparse Accuracy = %.4f' % acc_sparse)

    return out_sparse, out_dense
Exemplo n.º 9
0
def classifier_multiclasslinearmachine_modular(
        fm_train_real=traindat,
        fm_test_real=testdat,
        label_train_multiclass=label_traindat,
        label_test_multiclass=label_testdat,
        lawidth=2.1,
        C=1,
        epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
    from shogun.Classifier import ECOCStrategy, ECOCDiscriminantEncoder, ECOCHDDecoder

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)

    encoder = ECOCDiscriminantEncoder()
    encoder.set_features(feats_train)
    encoder.set_labels(labels)
    encoder.set_sffs_iterations(50)

    strategy = ECOCStrategy(encoder, ECOCHDDecoder())

    classifier = LinearMulticlassMachine(strategy, feats_train, classifier,
                                         labels)
    classifier.train()
    label_pred = classifier.apply(feats_test)
    out = label_pred.get_labels()

    if label_test_multiclass is not None:
        from shogun.Evaluation import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc = evaluator.evaluate(label_pred, labels_test)
        print('Accuracy = %.4f' % acc)

    return out
Exemplo n.º 10
0
def evaluation_cross_validation_classification(traindat=traindat,
                                               label_traindat=label_traindat):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY
    from shogun.Evaluation import StratifiedCrossValidationSplitting
    from shogun.Features import Labels
    from shogun.Features import RealFeatures
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC

    # training data
    features = RealFeatures(traindat)
    labels = Labels(label_traindat)

    # classifier
    classifier = LibLinear(L2R_L2LOSS_SVC)

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy = StratifiedCrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium = ContingencyTableEvaluation(ACCURACY)

    # cross-validation instance
    cross_validation = CrossValidation(classifier, features, labels,
                                       splitting_strategy,
                                       evaluation_criterium)

    # (optional) repeat x-val 10 times
    cross_validation.set_num_runs(10)

    # (optional) request 95% confidence intervals for results (not actually needed
    # for this toy example)
    cross_validation.set_conf_int_alpha(0.05)

    # perform cross-validation and print results
    result = cross_validation.evaluate()
    print "mean:", result.mean
    if result.has_conf_int:
        print "[", result.conf_int_low, ",", result.conf_int_up, "] with alpha=", result.conf_int_alpha
Exemplo n.º 11
0
def solver_dcd_shogun_debug(C, all_xt, all_lt, task_indicator, M, L):
    """
    use standard LibLinear for debugging purposes
    """

    xt = numpy.array(all_xt)
    lt = numpy.array(all_lt)
    tt = numpy.array(task_indicator, dtype=numpy.int32)
    tsm = numpy.array(M)
    num_tasks = L.shape[0]

    # sanity checks
    assert len(xt) == len(lt) == len(tt)
    assert M.shape == L.shape
    assert num_tasks == len(set(tt))

    # set up shogun objects
    if type(xt[0]) == str:
        feat = create_hashed_features_wdk(xt, 8)
    else:
        feat = RealFeatures(xt.T)

    lab = Labels(lt)

    # set up machinery
    svm = LibLinear()
    svm.set_liblinear_solver_type(L2R_L1LOSS_SVC_DUAL)
    svm.io.set_loglevel(MSG_DEBUG)

    svm.set_C(C, C)
    svm.set_bias_enabled(False)

    # invoke training
    svm.set_labels(lab)
    svm.train(feat)

    # get model parameters
    W = [svm.get_w()]

    return W, 42, 42
Exemplo n.º 12
0
def classifier_liblinear_modular(fm_train_real, fm_test_real,
                                 label_train_twoclass, C, epsilon):

    from shogun.Features import RealFeatures, SparseRealFeatures, Labels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL
    from shogun.Mathematics import Math_init_random
    Math_init_random(17)

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    labels = Labels(label_train_twoclass)

    svm = LibLinear(C, feats_train, labels)
    svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL)
    svm.set_epsilon(epsilon)
    svm.set_bias_enabled(True)
    svm.train()

    svm.set_features(feats_test)
    svm.apply().get_labels()
    predictions = svm.apply()
    return predictions, svm, predictions.get_labels()
def classifier_multiclasslinearmachine_modular(
        fm_train_real=traindat,
        fm_test_real=testdat,
        label_train_multiclass=label_traindat,
        width=2.1,
        C=1,
        epsilon=1e-5):
    from shogun.Features import RealFeatures, Labels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
    from shogun.Classifier import ECOCStrategy, ECOCRandomSparseEncoder, ECOCRandomDenseEncoder, ECOCHDDecoder

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = Labels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)

    rnd_dense_strategy = ECOCStrategy(ECOCRandomDenseEncoder(),
                                      ECOCHDDecoder())
    rnd_sparse_strategy = ECOCStrategy(ECOCRandomSparseEncoder(),
                                       ECOCHDDecoder())

    dense_classifier = LinearMulticlassMachine(rnd_dense_strategy, feats_train,
                                               classifier, labels)
    dense_classifier.train()
    out_dense = dense_classifier.apply(feats_test).get_labels()

    sparse_classifier = LinearMulticlassMachine(rnd_sparse_strategy,
                                                feats_train, classifier,
                                                labels)
    sparse_classifier.train()
    out_sparse = sparse_classifier.apply(feats_test).get_labels()

    return out_sparse, out_dense
def classifier_multiclasslinearmachine_modular(
        fm_train_real=traindat,
        fm_test_real=testdat,
        label_train_multiclass=label_traindat,
        width=2.1,
        C=1,
        epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)
    mc_classifier = LinearMulticlassMachine(MulticlassOneVsOneStrategy(),
                                            feats_train, classifier, labels)

    mc_classifier.train()
    out = mc_classifier.apply().get_labels()
    return out
Exemplo n.º 15
0
def get_presvm(B=2.0):

    examples_presvm = [numpy.array([ 2.1788894 ,  3.89163458,  5.55086917,  6.4022742 ,  3.14964751, -0.4622959 ,  5.38538904,  5.9962938 ,  6.29690849]),
     numpy.array([ 2.1788894 ,  3.89163458,  5.55086917,  6.4022742 ,  3.14964751,  -0.4622959 ,  5.38538904,  5.9962938 ,  6.29690849]),
     numpy.array([ 0.93099452,  0.38871617,  1.57968949,  1.25672527, -0.8123137 ,   0.20786586,  1.378121  ,  1.15598866,  0.80265343]),
     numpy.array([ 0.68705535,  0.15144113, -0.81306157, -0.7664577 ,  1.16452945,  -0.2712956 ,  0.483094  , -0.16302007, -0.39094812]),
     numpy.array([-0.71374437, -0.16851719,  1.43826895,  0.95961166, -0.2360497 ,  -0.30425755,  1.63157052,  1.15990427,  0.63801465]),
     numpy.array([ 0.68705535,  0.15144113, -0.81306157, -0.7664577 ,  1.16452945, -0.2712956 ,  0.483094  , -0.16302007, -0.39094812]),
     numpy.array([-0.71374437, -0.16851719,  1.43826895,  0.95961166, -0.2360497 , -0.30425755,  1.63157052,  1.15990427,  0.63801465]),
     numpy.array([-0.98028302, -0.23974489,  2.1687206 ,  1.99338824, -0.67070205, -0.33167281,  1.3500379 ,  1.34915685,  1.13747975]),
     numpy.array([ 0.67109612,  0.12662017, -0.48254886, -0.49091898,  1.31522237, -0.34108933,  0.57832179, -0.01992828, -0.26581628]),
     numpy.array([ 0.3193611 ,  0.44903416,  3.62187778,  4.1490827 ,  1.58832961,  1.95583397,  1.36836023,  1.92521945,  2.41114998])]
    labels_presvm = [-1.0, -1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0]

    examples = [numpy.array([-0.49144487, -0.19932263, -0.00408188, -0.21262012,  0.14621013, -0.50415481,  0.32317317, -0.00317602, -0.21422637]), 
     numpy.array([ 0.0511817 , -0.04226666, -0.30454651, -0.38759116,  0.31639514,  0.32558471,  0.49364473,  0.04515591, -0.06963456]),
     numpy.array([-0.30324369, -0.11909251, -0.03210278, -0.2779561 ,  1.31488853, -0.33165365,  0.60176018, -0.00384946, -0.15603975]),
     numpy.array([ 0.59282756, -0.0039991 , -0.26028983, -0.26722552,  1.63314995, -0.51199338,  0.33340685, -0.0170519 , -0.19211039]),
     numpy.array([-0.18338766, -0.07783465,  0.42019824,  0.201753  ,  2.01160098,  0.33326111,  0.75591909,  0.36631525,  0.1761829 ]),
     numpy.array([ 0.10273793, -0.02189574,  0.91092358,  0.74827973,  0.51882902, -0.1286531 ,  0.64463658,  0.67468349,  0.55587266]),
     numpy.array([-0.09727099, -0.13413522,  0.18771062,  0.19411594,  1.48547364, -0.43169608,  0.55064534,  0.24331473,  0.10878847]),
     numpy.array([-0.22494375, -0.15492964,  0.28017737,  0.29794467,  0.96403895,  0.43880289,  0.08053425,  0.07456818,  0.12102371]),
     numpy.array([-0.18161417, -0.17692039,  0.19554942, -0.00785625,  1.38315115, -0.05923183, -0.05723568, -0.15463646, -0.24249483]),
     numpy.array([-0.36538359, -0.20040061, -0.38384388, -0.40206556, -0.25040256,  0.94205875,  0.40162798,  0.00327328, -0.24107393])]

    labels = [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, -1.0]

    examples_test = [numpy.array([-0.45159799, -0.11401394,  1.28574573,  1.09144306,  0.92253119,  -0.47230164,  0.77032486,  0.83047366,  0.74768906]),
     numpy.array([ 0.42613105,  0.0092778 , -0.78640296, -0.71632445,  0.41154244,   0.88380309,  0.19475759, -0.14195876, -0.30479425]),
     numpy.array([-0.09727099, -0.13413522,  0.18771062,  0.19411594,  1.48547364,  -0.43169608,  0.55064534,  0.24331473,  0.10878847]),
     numpy.array([ 0.11558796, -0.08867647, -0.26432074, -0.30924546, -1.08243017,  -0.1339607 , -0.1956124 , -0.2428358 , -0.25761213]),
     numpy.array([ 1.23679696,  0.18753081, -0.25593329, -0.12051991,  0.64976989,  -0.17184101,  0.14951337,  0.01988587, -0.0356698 ]),
     numpy.array([ 1.03355002,  0.05316195, -0.97905368, -0.75482121,  0.28673776,   2.27142733,  0.02654739, -0.31109851, -0.44555277]),
     numpy.array([-0.53662325, -0.21434756, -0.12105795, -0.27531257,  0.66947047,   0.05474302, -0.00717455, -0.17700575, -0.22253444]),
     numpy.array([ 0.11272632, -0.12674826, -0.49736457, -0.51445609,  0.88518932,  -0.51558669, -0.12000557, -0.32973613, -0.38488736]),
     numpy.array([ 0.8372111 ,  0.06972199, -1.00454229, -0.79869642,  1.19376333,  -0.40160273, -0.25122157, -0.46417918, -0.50234858]),
     numpy.array([-0.36325018, -0.12206184,  0.10525247, -0.15663416,  1.03616948,  -0.51699463,  0.59566286,  0.35363369,  0.10545559])]


    #############################################
    #    compute pre-svm
    #############################################


    # create real-valued features as first step
    examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64)
    examples_presvm = numpy.transpose(examples_presvm)

    feat_presvm = RealFeatures(examples_presvm)
    lab_presvm = Labels(numpy.array(labels_presvm))
    wdk_presvm = LinearKernel(feat_presvm, feat_presvm)



    presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm)
    presvm_liblinear.set_max_iterations(10000)
    presvm_liblinear.set_bias_enabled(False)
    presvm_liblinear.train()


    #return presvm_liblinear


    #def get_da_svm(presvm_liblinear):


    #############################################
    #    compute linear term manually
    #############################################

    examples = numpy.array(examples, dtype=numpy.float64)
    examples = numpy.transpose(examples)

    feat = RealFeatures(examples)
    lab = Labels(numpy.array(labels))

    dasvm_liblinear = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_liblinear, B)
    dasvm_liblinear.set_bias_enabled(False)
    dasvm_liblinear.train()

    helper.save("/tmp/svm", presvm_liblinear)
    presvm_pickle = helper.load("/tmp/svm")

    dasvm_pickle = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_pickle, B)
    dasvm_pickle.set_bias_enabled(False)
    dasvm_pickle.train()

    helper.save("/tmp/dasvm", dasvm_liblinear)
    dasvm_pickle2 = helper.load("/tmp/dasvm")

    #############################################
    #    load test data
    #############################################

    examples_test = numpy.array(examples_test, dtype=numpy.float64)
    examples_test = numpy.transpose(examples_test)
    feat_test = RealFeatures(examples_test)

    # check if pickled and unpickled classifiers behave the same
    out1 = dasvm_liblinear.classify(feat_test).get_labels()
    out2 = dasvm_pickle.classify(feat_test).get_labels()

    # compare outputs
    for i in xrange(len(out1)):    
        
        try:
            assert(abs(out1[i]-out2[i])<= 0.001)
        except:
            print "(%.5f, %.5f)" % (out1[i], out2[i])

            
    print "classification agrees."
Exemplo n.º 16
0
    if re.match(r'ECOC.+Encoder', x) and nonabstract_class(x)
]
decoders = [
    x for x in dir(Classifier)
    if re.match(r'ECOC.+Decoder', x) and nonabstract_class(x)
]

fea_train = RealFeatures(traindat)
fea_test = RealFeatures(testdat)
gnd_train = MulticlassLabels(label_traindat)
if label_testdat is None:
    gnd_test = None
else:
    gnd_test = MulticlassLabels(label_testdat)

base_classifier = LibLinear(L2R_L2LOSS_SVC)
base_classifier.set_bias_enabled(True)

print('Testing with %d encoders and %d decoders' %
      (len(encoders), len(decoders)))
print('-' * 70)
format_str = '%%15s + %%-10s  %%-10%s %%-10%s %%-10%s'
print((format_str % ('s', 's', 's')) %
      ('encoder', 'decoder', 'codelen', 'time', 'accuracy'))


def run_ecoc(ier, idr):
    encoder = getattr(Classifier, encoders[ier])()
    decoder = getattr(Classifier, decoders[idr])()

    # whether encoder is data dependent
Exemplo n.º 17
0
    ])
]

#############################################
#    compute pre-svm
#############################################

# create real-valued features as first step
examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64)
examples_presvm = numpy.transpose(examples_presvm)

feat_presvm = RealFeatures(examples_presvm)
lab_presvm = Labels(numpy.array(labels_presvm))
wdk_presvm = LinearKernel(feat_presvm, feat_presvm)

presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm)
presvm_liblinear.set_max_iterations(10000)
presvm_liblinear.set_bias_enabled(False)
presvm_liblinear.train()

presvm_libsvm = LibSVM(1, wdk_presvm, lab_presvm)
#presvm_libsvm = SVMLight(1, wdk_presvm, lab_presvm)

#presvm_libsvm.io.set_loglevel(MSG_DEBUG)
presvm_libsvm.set_bias_enabled(False)
presvm_libsvm.train()

my_w = presvm_liblinear.get_w()
presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm)
presvm_liblinear.set_w(my_w)
Exemplo n.º 18
0
def classifier_multiclass_ecoc (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5):

	import shogun.Classifier as Classifier
	from shogun.Classifier import ECOCStrategy, LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
	from shogun.Evaluation import MulticlassAccuracy
	from shogun.Features import RealFeatures, MulticlassLabels

	def nonabstract_class(name):
		try:
		    getattr(Classifier, name)()
		except TypeError:
		    return False
		return True

	encoders = [x for x in dir(Classifier)
		    if re.match(r'ECOC.+Encoder', x) and nonabstract_class(x)]
	decoders = [x for x in dir(Classifier)
		    if re.match(r'ECOC.+Decoder', x) and nonabstract_class(x)]

	fea_train = RealFeatures(fm_train_real)
	fea_test  = RealFeatures(fm_test_real)
	gnd_train = MulticlassLabels(label_train_multiclass)
	if label_test_multiclass is None:
		gnd_test = None
	else:
		gnd_test = MulticlassLabels(label_test_multiclass)

	base_classifier = LibLinear(L2R_L2LOSS_SVC)
	base_classifier.set_bias_enabled(True)

	#print('Testing with %d encoders and %d decoders' % (len(encoders), len(decoders)))
	#print('-' * 70)
	#format_str = '%%15s + %%-10s  %%-10%s %%-10%s %%-10%s'
	#print((format_str % ('s', 's', 's')) % ('encoder', 'decoder', 'codelen', 'time', 'accuracy'))

	def run_ecoc(ier, idr):
		encoder = getattr(Classifier, encoders[ier])()
		decoder = getattr(Classifier, decoders[idr])()

		# whether encoder is data dependent
		if hasattr(encoder, 'set_labels'):
		    encoder.set_labels(gnd_train)
		    encoder.set_features(fea_train)

		strategy = ECOCStrategy(encoder, decoder)
		classifier = LinearMulticlassMachine(strategy, fea_train, base_classifier, gnd_train)
		classifier.train()
		label_pred = classifier.apply(fea_test)
		if gnd_test is not None:
		    evaluator = MulticlassAccuracy()
		    acc = evaluator.evaluate(label_pred, gnd_test)
		else:
		    acc = None

		return (classifier.get_num_machines(), acc)


	for ier in range(len(encoders)):
		for idr in range(len(decoders)):
		    t_begin = time.clock()
		    (codelen, acc) = run_ecoc(ier, idr)
		    if acc is None:
		        acc_fmt = 's'
		        acc = 'N/A'
		    else:
		        acc_fmt = '.4f'

		    t_elapse = time.clock() - t_begin
def classifier_perceptron_graphical(n=100, distance=5, learn_rate=1., max_iter=1000, num_threads=1, seed=None, nperceptrons=5):
	from shogun.Features import RealFeatures, BinaryLabels
	from shogun.Classifier import Perceptron, LibLinear, L2R_L2LOSS_SVC
	from modshogun import MSG_INFO

	# 2D data
	_DIM = 2

	# To get the nice message that the perceptron has converged
	dummy = BinaryLabels()
#	dummy.io.set_loglevel(MSG_INFO)

	np.random.seed(seed)

	# Produce some (probably) linearly separable training data by hand
	# Two Gaussians at a far enough distance
	X = np.array(np.random.randn(_DIM,n))+distance
	Y = np.array(np.random.randn(_DIM,n))
	label_train_twoclass = np.hstack((np.ones(n), -np.ones(n)))

	fm_train_real = np.hstack((X,Y))
	feats_train = RealFeatures(fm_train_real)
	labels = BinaryLabels(label_train_twoclass)

	perceptron = Perceptron(feats_train, labels)
	perceptron.set_learn_rate(learn_rate)
	perceptron.set_max_iter(max_iter)
	perceptron.set_initialize_hyperplane(False)

	# Find limits for visualization
	x_min = min(np.min(X[0,:]), np.min(Y[0,:]))
	x_max = max(np.max(X[0,:]), np.max(Y[0,:]))

	y_min = min(np.min(X[1,:]), np.min(Y[1,:]))
	y_max = max(np.max(X[1,:]), np.max(Y[1,:]))

	fig1, axes1 = plt.subplots(1,1)
	fig2, axes2 = plt.subplots(1,1)

	for i in xrange(nperceptrons):
		# Initialize randomly weight vector and bias
		perceptron.set_w(np.random.random(2))
		perceptron.set_bias(np.random.random())

		# Run the perceptron algorithm
		perceptron.train()

		# Construct the hyperplane for visualization
		# Equation of the decision boundary is w^T x + b = 0
		b = perceptron.get_bias()
		w = perceptron.get_w()

		hx = np.linspace(x_min-1,x_max+1)
		hy = -w[1]/w[0] * hx

		axes1.plot(hx, -1/w[1]*(w[0]*hx+b))
		axes2.plot(hx, -1/w[1]*(w[0]*hx+b), alpha=0.5)

		print('minimum distance with perceptron is %f' % min_distance(w, b, feats_train))

	C = 1
	epsilon = 1e-3
	svm = LibLinear(C, feats_train, labels)
	svm.set_liblinear_solver_type(L2R_L2LOSS_SVC)
	svm.set_epsilon(epsilon)
	svm.set_bias_enabled(True)
	svm.train()

	b = svm.get_bias()
	w = svm.get_w()

	print('minimum distance with svm is        %f' % min_distance(w, b, feats_train))

	hx = np.linspace(x_min-1,x_max+1)
	hy = -w[1]/w[0] * hx

	axes2.plot(hx, -1/w[1]*(w[0]*hx+b), 'k', linewidth=2.0)

	# Plot the two-class data
	axes1.scatter(X[0,:], X[1,:], s=40, marker='o', facecolors='none', edgecolors='b')
	axes1.scatter(Y[0,:], Y[1,:], s=40, marker='s', facecolors='none', edgecolors='r')

	axes2.scatter(X[0,:], X[1,:], s=40, marker='o', facecolors='none', edgecolors='b')
	axes2.scatter(Y[0,:], Y[1,:], s=40, marker='s', facecolors='none', edgecolors='r')

	# Customize the plot
	axes1.axis([x_min-1, x_max+1, y_min-1, y_max+1])
	axes1.set_title('Rosenblatt\'s Perceptron Algorithm')
	axes1.set_xlabel('x')
	axes1.set_ylabel('y')

	axes2.axis([x_min-1, x_max+1, y_min-1, y_max+1])
	axes2.set_title('Support Vector Machine')
	axes2.set_xlabel('x')
	axes2.set_ylabel('y')

	plt.show()

	return perceptron
Exemplo n.º 20
0
def classifier_non_separable_svm(n=100, m=10, distance=5, seed=None):
    '''
	n is the number of examples per class and m is the number of examples per class that gets its
	label swapped to force non-linear separability
	'''
    from shogun.Features import RealFeatures, BinaryLabels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC

    # 2D data
    _DIM = 2

    # To get the nice message that the perceptron has converged
    dummy = BinaryLabels()

    np.random.seed(seed)

    # Produce some (probably) linearly separable training data by hand
    # Two Gaussians at a far enough distance
    X = np.array(np.random.randn(_DIM, n)) + distance
    Y = np.array(np.random.randn(_DIM, n))
    # The last five points of each class are swapped to force non-linear separable data
    label_train_twoclass = np.hstack(
        (np.ones(n - m), -np.ones(m), -np.ones(n - m), np.ones(m)))

    fm_train_real = np.hstack((X, Y))
    feats_train = RealFeatures(fm_train_real)
    labels = BinaryLabels(label_train_twoclass)

    # Train linear SVM
    C = 1
    epsilon = 1e-3
    svm = LibLinear(C, feats_train, labels)
    svm.set_liblinear_solver_type(L2R_L2LOSS_SVC)
    svm.set_epsilon(epsilon)
    svm.set_bias_enabled(True)
    svm.train()

    # Get hyperplane parameters
    b = svm.get_bias()
    w = svm.get_w()

    # Find limits for visualization
    x_min = min(np.min(X[0, :]), np.min(Y[0, :]))
    x_max = max(np.max(X[0, :]), np.max(Y[0, :]))

    y_min = min(np.min(X[1, :]), np.min(Y[1, :]))
    y_max = max(np.max(X[1, :]), np.max(Y[1, :]))

    hx = np.linspace(x_min - 1, x_max + 1)
    hy = -w[1] / w[0] * hx

    plt.plot(hx, -1 / w[1] * (w[0] * hx + b), 'k', linewidth=2.0)

    # Plot the two-class data
    pos_idxs = label_train_twoclass == +1
    plt.scatter(fm_train_real[0, pos_idxs],
                fm_train_real[1, pos_idxs],
                s=40,
                marker='o',
                facecolors='none',
                edgecolors='b')

    neg_idxs = label_train_twoclass == -1
    plt.scatter(fm_train_real[0, neg_idxs],
                fm_train_real[1, neg_idxs],
                s=40,
                marker='s',
                facecolors='none',
                edgecolors='r')

    # Customize the plot
    plt.axis([x_min - 1, x_max + 1, y_min - 1, y_max + 1])
    plt.title('SVM with non-linearly separable data')
    plt.xlabel('x')
    plt.ylabel('y')

    plt.show()

    return svm