示例#1
0
def train(classifier_name, classifier_extra, label_values):
    import classipy
    label_values = list(label_values)
    hadoopy.counter('FeatureShape', str(len(label_values[0][1])))
    if classifier_name == 'svmlinear':
        return classipy.SVMLinear(options={'B': '1'}).train(label_values)
    elif classifier_name == 'svm':
        return classipy.SVM(options={'t': '2'}).train(label_values)
    elif classifier_name == 'svm_hik':
        return classipy.SVMScikit(kernel=classipy.kernels.histogram_intersection).train(label_values)
    elif classifier_name == 'svmlinear_autotune':

        def wrapped_optimizer(*args, **kw):
            for x in pyram.exponential_grid(*args, **kw):
                hadoopy.counter('X-Val', 'Rounds')
                yield x
        b = classipy.select_parameters(classipy.SVMLinear, label_values,
                                       {'c': (10**-2, 10**1, 10)},
                                       wrapped_optimizer,
                                       options={'B': '1'})[1]
        print(b)
        return classipy.SVMLinear(b).train(label_values)
    elif classifier_name == 'plslinearsvmxval':
        num_dims = label_values[0][1].size
        # Set the parameters by cross-validation
        #,'pls__n_components': [x for x in [1, 8, 16, 32, 64, 128, 256] if x <= num_dims]
        #('pls', sklearn.pls.PLSRegression(n_components=0)),
        tuned_parameters = [{'svm__C': [.001, .01, .1, 1, 10, 100]}]
        p = sklearn.pipeline.Pipeline([('svm', sklearn.svm.SVC(kernel=classipy.kernels.histogram_intersection, scale_C=True))])  # was cls
        #p = sklearn.grid_search.GridSearchCV(cls, tuned_parameters, score_func=sklearn.metrics.f1_score)
        num_neg = 0
        num_pos = 0
        import random
        random.shuffle(label_values)
        new_label_values = []
        for l, v in label_values:
            if l == 1:
                if num_pos < 100:
                    new_label_values.append((l, v))
                num_pos += 1
            else:
                if num_neg < 100:
                    new_label_values.append((l, v))
                num_neg += 1
        import sys
        sys.stderr.write('Num Neg[%d] Pos[%d]\n' % (num_neg, num_pos))
        p.fit(*zip(*new_label_values)[::-1])
        return p  # p.best_estimator_
    else:
        raise ValueError('Unknown classifier [%s]' % classifier_name)
示例#2
0
import numpy as np
import pyram
import classipy

a = [(-1, x) for x in np.random.multivariate_normal([0, 0], np.eye(2), 1000)]
a += [(1, x) for x in np.random.multivariate_normal([3, 3],
                                                    np.eye(2) * 2, 1000)]
b = classipy.select_parameters(classipy.SVM,
                               a, {
                                   'c': (10**-1, 10**2, 10),
                                   'g': (10**-1, 10**5, 10)
                               },
                               pyram.exponential_grid,
                               options={'t': '2'})
print(b)
示例#3
0
import numpy as np
import pyram
import classipy

a = [(-1, x) for x in np.random.multivariate_normal([0, 0], np.eye(2), 1000)]
a += [(1, x) for x in np.random.multivariate_normal([3, 3], np.eye(2) * 2, 1000)]
b = classipy.select_parameters(classipy.SVM, a, {'c': (10**-1, 10**2, 10),
                                                 'g': (10**-1, 10**5, 10)},
                               pyram.exponential_grid, options={'t': '2'})
print(b)