Пример #1
0
def classifier_svmocas_modular(train_fname=traindat,
                               test_fname=testdat,
                               label_fname=label_traindat,
                               C=0.9,
                               epsilon=1e-5,
                               num_threads=1):
    from modshogun import RealFeatures, BinaryLabels
    from modshogun import CSVFile
    try:
        from modshogun import SVMOcas
    except ImportError:
        print("SVMOcas not available")
        return

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))
    labels = BinaryLabels(CSVFile(label_fname))

    svm = SVMOcas(C, feats_train, labels)
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.set_bias_enabled(False)
    svm.train()

    bias = svm.get_bias()
    w = svm.get_w()
    predictions = svm.apply(feats_test)
    return predictions, svm, predictions.get_labels()
if __name__ == '__main__':
    from modshogun import SparseRealFeatures, RandomFourierDotFeatures, GAUSSIAN
    from modshogun import LibSVMFile, BinaryLabels, SVMOcas
    from modshogun import Time
    from numpy import array

    args = parse_arguments()

    print 'Loading training data...'
    sparse_data = load_sparse_data(args.dataset, args.dimension)

    kernel_params = array([args.width], dtype=float)
    rf_feats = RandomFourierDotFeatures(sparse_data['data'], args.D, GAUSSIAN,
                                        kernel_params)

    svm = SVMOcas(args.C, rf_feats, sparse_data['labels'])
    svm.set_epsilon(args.epsilon)
    print 'Starting training.'
    timer = Time()
    svm.train()
    timer.stop()
    print 'Training completed, took {0:.2f}s.'.format(timer.time_diff_sec())

    predicted_labels = svm.apply()
    evaluate(predicted_labels, sparse_data['labels'], 'Training results')

    if args.testset != None:
        random_coef = rf_feats.get_random_coefficients()
        # removing current dataset from memory in order to load the test dataset,
        # to avoid running out of memory
        rf_feats = None