Пример #1
0
def main():
    infile = "./data/batch/linear_regression_train.csv"
    testfile = "./data/batch/linear_regression_test.csv"

    # Configure a Ridge regression training object
    train_algo = d4p.ridge_regression_training(interceptFlag=True)

    # Read data. Let's have 10 independent, and 2 dependent variables (for each observation)
    indep_data = read_csv(infile, range(10))
    dep_data = read_csv(infile, range(10, 12))
    # Now train/compute, the result provides the model for prediction
    train_result = train_algo.compute(indep_data, dep_data)

    # Now let's do some prediction
    predict_algo = d4p.ridge_regression_prediction()
    # read test data (with same #features)
    pdata = read_csv(testfile, range(10))
    ptdata = read_csv(testfile, range(10, 12))
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # The prediction result provides prediction
    assert predict_result.prediction.shape == (pdata.shape[0],
                                               dep_data.shape[1])

    return (predict_result, ptdata)
Пример #2
0
def _daal4py_fit(self, X, y_):
    X = make2d(X)
    y = make2d(y_)

    _fptype = getFPType(X)

    ridge_params = np.asarray(self.alpha, dtype=X.dtype)
    if ridge_params.size != 1 and ridge_params.size != y.shape[1]:
        raise ValueError("alpha length is wrong")
    ridge_params = ridge_params.reshape((1, -1))

    ridge_alg = daal4py.ridge_regression_training(
        fptype=_fptype,
        method='defaultDense',
        interceptFlag=(self.fit_intercept is True),
        ridgeParameters=ridge_params)
    ridge_res = ridge_alg.compute(X, y)

    ridge_model = ridge_res.model
    self.daal_model_ = ridge_model
    coefs = ridge_model.Beta

    self.intercept_ = coefs[:, 0].copy(order='C')
    self.coef_ = coefs[:, 1:].copy(order='C')

    if self.coef_.shape[0] == 1 and y_.ndim == 1:
        self.coef_ = np.ravel(self.coef_)
        self.intercept_ = self.intercept_[0]

    return self
Пример #3
0
    def ridgeRegression(self, X_train, X_test, y_train, y_test, target):
        '''
        Method for Ridge Regression

        '''
        
        # Configure a Ridge regression training object
        train_algo = d4p.ridge_regression_training(interceptFlag=True)
        self.logger.info('Training the Ridge Regression in pydaal Batch/Serial Mode')
        
        # time the computation time
        start_time = time.time()
        train_result = train_algo.compute(X_train, y_train)
        self.latency["Serial Ridge Regression Batch Time"] = time.time() - start_time

        predict_algo = d4p.ridge_regression_prediction()

        # Now train/compute, the result provides the model for prediction
        predict_result = predict_algo.compute(X_test, train_result.model)

        # stop_time = time.time()
        pd_predict = predict_result.prediction

        self.logger.info('Completed Ridge Regression in pydaal Batch/Serial Mode')

        # Compute metrics
        mse = mean_squared_error(y_test, pd_predict)
        r2score = r2_score(y_test, pd_predict)

        # Store the time taken and model metrics
        self.metrics["MSE For Serial Ridge regression Batch"] = mse
        self.metrics["R2 Score For Serial Ridge regression Batch"] = r2score

        return
Пример #4
0
    def ridgeRegression(self, Data_Path, test_data_path, target, n):
        '''
        daal4py Ridge Regression SPMD Mode
        '''

        # Initialize SPMD mode
        d4p.daalinit(nthreads=n)

        file = Data_Path + str(d4p.my_procid() + 1) + ".csv"

        # training
        data = pd.read_csv(file)
        X = data.drop(columns=target)
        y = data[target]

        # test file setup
        test = pd.read_csv(test_data_path)
        y_test = test[target]
        X_test = test.drop(target, axis=1)

        # Configure a Ridge regression training object
        train_algo = d4p.ridge_regression_training(distributed=True,
                                                   interceptFlag=True)
        self.logger.info('Training the Ridge Regression in pydaal SPMD Mode')

        start_time = time.time()

        train_result = train_algo.compute(X, y)

        self.latency["Parallel Ridge Regression SPMD Time"] = time.time() - \
            start_time

        # Only process #0 reports results
        if d4p.my_procid() == 0:
            predict_algo = d4p.ridge_regression_prediction()
            # now predict using the model from the training above
            predict_result = predict_algo.compute(X_test, train_result.model)

        self.logger.info('Completed Ridge Regression in pydaal SPMD Mode')
        d4p.daalfini()

        # Compute metrics
        mse = mean_squared_error(y_test, predict_result.prediction)
        r2score = r2_score(y_test, predict_result.prediction)

        # Store the time taken and model metrics
        self.metrics["MSE For Parallel Ridge regression SPMD"] = mse
        self.metrics["R2 Score For Parallel Ridge regression SPMD"] = r2score

        return
Пример #5
0
def main():
    infile = "./data/batch/linear_regression_train.csv"
    testfile = "./data/batch/linear_regression_test.csv"

    # Configure a Ridge regression training object for streaming
    train_algo = d4p.ridge_regression_training(interceptFlag=True,
                                               streaming=True)

    chunk_size = 250
    lines_read = 0
    # read and feed chunk by chunk
    while True:
        # Read data in chunks
        # Let's have 10 independent, and 2 dependent variables (for each observation)
        try:
            indep_data = read_csv(infile, range(10), lines_read, chunk_size)
            dep_data = read_csv(infile, range(10, 12), lines_read, chunk_size)
        except:
            break
        # Now feed chunk
        train_algo.compute(indep_data, dep_data)
        lines_read += indep_data.shape[0]

    # All chunks are done, now finalize the computation
    train_result = train_algo.finalize()

    # Now let's do some prediction
    predict_algo = d4p.ridge_regression_prediction()
    # read test data (with same #features)
    pdata = read_csv(testfile, range(10))
    ptdata = read_csv(testfile, range(10, 12))
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # The prediction result provides prediction
    assert predict_result.prediction.shape == (pdata.shape[0],
                                               dep_data.shape[1])

    return (predict_result, ptdata)
Пример #6
0
def test_fit(X, y):
    regr_train = ridge_regression_training(fptype=getFPType(X),
                                           ridgeParameters=np.array(
                                               [[params.alpha]]),
                                           interceptFlag=params.fit_intercept)
    return regr_train.compute(X, y)
Пример #7
0
#    mpirun -genv DIST_CNC=MPI -n 4 python ./ridge_regression_spmd.py

import daal4py as d4p
from numpy import loadtxt, allclose

if __name__ == "__main__":

    # Initialize SPMD mode
    d4p.daalinit(spmd=True)

    # Each process gets its own data
    infile = "./data/distributed/linear_regression_train_" + str(
        d4p.my_procid() + 1) + ".csv"

    # Configure a Ridge regression training object
    train_algo = d4p.ridge_regression_training(distributed=True)

    # Read data. Let's have 10 independent, and 2 dependent variables (for each observation)
    indep_data = loadtxt(infile, delimiter=',', usecols=range(10))
    dep_data = loadtxt(infile, delimiter=',', usecols=range(10, 12))
    # Now train/compute, the result provides the model for prediction
    train_result = train_algo.compute(indep_data, dep_data)

    # Now let's do some prediction
    # It run only on a single node
    if d4p.my_procid() == 0:
        predict_algo = d4p.ridge_regression_prediction(distributed=True)
        # read test data (with same #features)
        pdata = loadtxt("./data/distributed/linear_regression_test.csv",
                        delimiter=',',
                        usecols=range(10))
Пример #8
0
            t1 = timeit.default_timer()
            r = func(*args, **keyArgs)
            t2 = timeit.default_timer()
            times.append(t2-t1)
        print (min(times))
        return r
    return st_func


p = args.size[0]
n = args.size[1]
X = rand(p,n)
Xp = rand(p,n)
y = rand(p,n)

regr_train = ridge_regression_training()
regr_predict = ridge_regression_prediction()

@st_time
def test_fit(X,y):
    regr_train.compute(X, y)

@st_time
def test_predict(X, m):
    regr_predict.compute(X, m)

print (','.join([args.batchID, args.arch, args.prefix, "Ridge.fit", coreString(args.num_threads), "Double", "%sx%s" % (p,n)]), end=',')
test_fit(X, y)
res = regr_train.compute(X, y)
print (','.join([args.batchID, args.arch, args.prefix, "Ridge.prediction", coreString(args.num_threads), "Double", "%sx%s" % (p,n)]), end=',')
test_predict(Xp, res.model)
Пример #9
0
def test_fit(X, y):
    regr_train = ridge_regression_training(fptype=getFPType(X),
                                           interceptFlag=params.fit_intercept)
    return regr_train.compute(X, y)