示例#1
0
    def _run_one_size(self, algo, n_samples, n_features, param_overrides={}, dataset_param_overrides={}):

        data = gen_data(self.dataset_name, n_samples, n_features, **dataset_param_overrides)

        # sklearn
        algo.reload()
        skl_timer = BenchmarkTimer(self.n_reps)
        for rep in skl_timer.benchmark_runs():
            algo.run(data, **param_overrides)
        skl_elapsed = np.min(skl_timer.timings)

        # patched sklearn
        d4p.patch_sklearn()
        algo.reload()
        d4p_timer = BenchmarkTimer(self.n_reps)
        for rep in d4p_timer.benchmark_runs():
            algo.run(data, **param_overrides)
        d4p_elapsed = np.min(d4p_timer.timings)
        d4p.unpatch_sklearn()

        speedup = skl_elapsed / d4p_elapsed

        print(f"{algo.name} (n_samples={n_samples}, n_features={n_features})"
              f" [skl={skl_elapsed}, d4p={d4p_elapsed} speedup={speedup}]")

        return dict(
            skl_time=skl_elapsed,
            d4p_time=d4p_elapsed,
            speedup=speedup,
            n_samples=n_samples,
            n_features=n_features,
            **param_overrides,
            **dataset_param_overrides
        )
示例#2
0
def parse_args(parser,
               size=None,
               loop_types=(),
               n_jobs_supported=True,
               prefix='sklearn'):
    '''
    Add common arguments useful for most benchmarks and parse.

    Parameters
    ----------
    parser : argparse.ArgumentParser
        Parser to which the arguments should be added.
    size : tuple of int, optional
        Enable '--size' argument with this default size.
        If None (default), no '--size' argument will be added.
    loop_types : iterable of str, optional
        Add arguments like '--fit-inner-loops' and '--fit-outer-loops',
        useful for tweaking runtime of the benchmark.
    n_jobs_supported : bool
        If set to True, generate a n_jobs member in the argparse Namespace
        corresponding to the optimal n_jobs parameter for scikit-learn.
        Otherwise, n_jobs will be set to None.
    prefix : str, optional, default 'sklearn'
        The default prefix to report

    Returns
    -------
    parser : argparse.ArgumentParser
        Parser to which the arguments were added.
        This is the same parser that was passed to this function.
    '''

    parser.add_argument('-n',
                        '--num-threads',
                        '--core-number',
                        default=-1,
                        dest='threads',
                        type=int,
                        help='Number of threads to use')
    parser.add_argument('-a',
                        '--arch',
                        default='?',
                        help='Machine architecture, for bookkeeping')
    parser.add_argument('-b',
                        '--batch',
                        '--batchID',
                        default='?',
                        help='Batch ID, for bookkeeping')
    parser.add_argument('-p',
                        '--prefix',
                        default=prefix,
                        help='Prefix string, for bookkeeping')
    parser.add_argument('-v',
                        '--verbose',
                        default=False,
                        action='store_true',
                        help='Output extra debug messages')
    parser.add_argument('--data-format',
                        type=str,
                        default='numpy',
                        choices=('numpy', 'pandas', 'cudf'),
                        help='Data format: numpy (default), pandas, cudf')
    parser.add_argument('--data-order',
                        type=str,
                        default='C',
                        choices=('C', 'F'),
                        help='Data order: C (row-major, default) or'
                        'F (column-major)')
    parser.add_argument('-d',
                        '--dtype',
                        type=np.dtype,
                        default=np.float64,
                        choices=(np.float32, np.float64),
                        help='Data type: float64 (default) or float32')
    parser.add_argument('--check-finiteness',
                        default=False,
                        action='store_true',
                        help='Check finiteness in sklearn input check'
                        '(disabled by default)')
    parser.add_argument('--output-format',
                        type=str,
                        default='json',
                        choices=('json'),
                        help='Output format: json')
    parser.add_argument('--time-method',
                        type=str,
                        default='box_filter',
                        choices=('box_filter'),
                        help='Method used for time mesurements')
    parser.add_argument('--box-filter-measurements',
                        type=int,
                        default=100,
                        help='Maximum number of measurements in box filter')
    parser.add_argument('--inner-loops',
                        default=100,
                        type=int,
                        help='Maximum inner loop iterations '
                        '(we take the mean over inner iterations)')
    parser.add_argument('--outer-loops',
                        default=100,
                        type=int,
                        help='Maximum outer loop iterations '
                        '(we take the min over outer iterations)')
    parser.add_argument('--time-limit',
                        default=10.,
                        type=float,
                        help='Target time to spend to benchmark')
    parser.add_argument('--goal-outer-loops',
                        default=10,
                        type=int,
                        dest='goal',
                        help='Number of outer loops to aim '
                        'while automatically picking number of '
                        'inner loops. If zero, do not automatically '
                        'decide number of inner loops.')
    parser.add_argument('--seed',
                        type=int,
                        default=12345,
                        help='Seed to pass as random_state')
    parser.add_argument('--dataset-name',
                        type=str,
                        default=None,
                        help='Dataset name')
    parser.add_argument('--no-intel-optimized',
                        default=False,
                        action='store_true',
                        help='Use no intel optimized version. '
                        'Now avalible for scikit-learn benchmarks'),
    parser.add_argument('--device',
                        default='None',
                        type=str,
                        choices=('host', 'cpu', 'gpu', 'None'),
                        help='Execution context device')

    for data in ['X', 'y']:
        for stage in ['train', 'test']:
            parser.add_argument(f'--file-{data}-{stage}',
                                type=argparse.FileType('r'),
                                help=f'Input file with {data}_{stage},'
                                'in NPY format')

    if size is not None:
        parser.add_argument('-s',
                            '--size',
                            default=size,
                            type=_parse_size,
                            dest='shape',
                            help='Problem size, delimited by "x" or ","')

    params = parser.parse_args()

    if not params.no_intel_optimized:
        try:
            from daal4py.sklearn import patch_sklearn
            patch_sklearn()
        except ImportError:
            print(
                'Failed to import daal4py.sklearn.patch_sklearn.'
                'Use stock version scikit-learn',
                file=sys.stderr)
            params.device = 'None'
    else:
        if params.device != 'None':
            print(
                'Device context is not supported for stock scikit-learn.'
                'Please use --no-intel-optimized=False with'
                f'--device={params.device} parameter. Fallback to --device=None.',
                file=sys.stderr)
            params.device = 'None'

    # disable finiteness check (default)
    if not params.check_finiteness:
        sklearn_disable_finiteness_check()

    # Ask DAAL what it thinks about this number of threads
    num_threads = prepare_daal_threads(num_threads=params.threads)
    if params.verbose:
        print(f'@ DAAL gave us {num_threads} threads')

    n_jobs = None
    if n_jobs_supported:
        n_jobs = num_threads = params.threads

    # Set threading and DAAL related params here
    setattr(params, 'threads', num_threads)
    setattr(params, 'n_jobs', n_jobs)

    # Set size string parameter for easy printing
    if size is not None:
        setattr(params, 'size', size_str(params.shape))

    # Very verbose output
    if params.verbose:
        print(f'@ params = {params.__dict__}')

    return params
示例#3
0
import numpy as np
from daal4py.sklearn import patch_sklearn
patch_sklearn()
from sklearn.linear_model import LinearRegression
from timeit import default_timer as timer
from sklearn.metrics import mean_squared_error
import pandas as pd
import common

NUM_LOOPS = 100
print("Computing for Linear Regression with Daal Patch")


def run_inference(num_observations: int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    train_x_df = common.get_test_data_df(X=common.X_df, size=num_observations)
    train_y = common.get_test_data_y(size=num_observations)
    num_rows = len(train_x_df)
    ######################
    print("_______________________________________")
    print("Total Number of Rows", num_rows)
    run_times = []
    inference_times = []
    for _ in range(NUM_LOOPS):

        start_time = timer()
        reg = LinearRegression().fit(train_x_df, train_y)
        #predictor.compute(data, MODEL)
        end_time = timer()
示例#4
0
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import os
import time
import modin.pandas as pd
from modin.experimental.engines.omnisci_on_ray.frame.omnisci_worker import OmnisciServer

from sklearn import config_context
import daal4py.sklearn as sklearn

sklearn.patch_sklearn()
from sklearn.model_selection import train_test_split
import sklearn.linear_model as lm
import numpy as np


def read():
    columns_names = [
        "YEAR0",
        "DATANUM",
        "SERIAL",
        "CBSERIAL",
        "HHWT",
        "CPI99",
        "GQ",
        "QGQ",