default=100, type=int, help='Data multiplier') parser.add_argument('--maxiter', type=int, default=100, help='Maximum number of iterations') params = parse_args(parser, loop_types=('fit', 'predict'), prefix='daal4py') # Load generated data X = np.load(params.filex) X_init = np.load(params.filei) X_mult = np.vstack((X, ) * params.data_multiplier) tol = np.load(params.filet) params.size = size_str(X.shape) params.n_clusters = X_init.shape[0] params.dtype = X.dtype # Define functions to time def test_fit(X, X_init): algorithm = kmeans(fptype=getFPType(X), nClusters=params.n_clusters, maxIterations=params.maxiter, assignFlag=True, accuracyThreshold=tol) return algorithm.compute(X, X_init) def test_predict(X, X_init):
def main(): parser = argparse.ArgumentParser(description='daal4py SVC benchmark with ' 'linear kernel') parser.add_argument('-x', '--filex', '--fileX', type=argparse.FileType('r'), required=True, help='Input file with features, in NPY format') parser.add_argument('-y', '--filey', '--fileY', type=argparse.FileType('r'), required=True, help='Input file with labels, in NPY format') parser.add_argument('-C', dest='C', type=float, default=0.01, help='SVM slack parameter') parser.add_argument('--kernel', choices=('linear', ), default='linear', help='SVM kernel function') parser.add_argument('--maxiter', type=int, default=2000, help='Maximum iterations for the iterative solver. ' '-1 means no limit.') parser.add_argument('--max-cache-size', type=int, default=64, help='Maximum cache size, in gigabytes, for SVM.') parser.add_argument('--tau', type=float, default=1e-12, help='Tau parameter for working set selection scheme') parser.add_argument('--tol', type=float, default=1e-16, help='Tolerance') parser.add_argument('--no-shrinking', action='store_false', default=True, dest='shrinking', help="Don't use shrinking heuristic") params = parse_args(parser, loop_types=('fit', 'predict'), prefix='daal4py') # Load data and cast to float64 X_train = np.load(params.filex.name).astype('f8') y_train = np.load(params.filey.name).astype('f8') cache_size_bytes = get_optimal_cache_size(X_train.shape[0], max_cache=params.max_cache_size) params.cache_size_mb = cache_size_bytes / 2**20 params.cache_size_bytes = cache_size_bytes params.n_classes = np.unique(y_train).size # This is necessary for daal y_train[y_train == 0] = -1 y_train = y_train[:, np.newaxis] columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', 'kernel', 'cache_size_mb', 'C', 'sv_len', 'n_classes', 'accuracy', 'time') params.size = size_str(X_train.shape) params.dtype = X_train.dtype print_header(columns, params) # Time fit and predict fit_time, res = time_mean_min(test_fit, X_train, y_train, params, outer_loops=params.fit_outer_loops, inner_loops=params.fit_inner_loops) res, support, indices, n_support = res params.sv_len = support.shape[0] print_row(columns, params, function='SVM.fit', time=fit_time) predict_time, yp = time_mean_min(test_predict, X_train, res, params, outer_loops=params.predict_outer_loops, inner_loops=params.predict_inner_loops) print_row(columns, params, function='SVM.predict', time=predict_time, accuracy=f'{100*accuracy_score(yp, y_train):.3}')