示例#1
0
 def test_non_contig(self):
     from numpy.random import rand
     p = 10007
     nx = 1017
     ny = 77
     X = rand(p + 1, nx + 1)
     Xp = rand(p + 1, nx + 1)
     y = rand(p + 1, ny + 1)
     Xn = X[:p, :nx]
     Xpn = Xp[:p, :nx]
     yn = y[:p, :ny]
     Xc = np.ascontiguousarray(Xn)
     Xpc = np.ascontiguousarray(Xpn)
     yc = np.ascontiguousarray(yn)
     self.assertTrue(not Xn.flags['C_CONTIGUOUS']
                     and not Xpn.flags['C_CONTIGUOUS']
                     and not yn.flags['C_CONTIGUOUS'])
     self.assertTrue(Xc.flags['C_CONTIGUOUS'] and Xpc.flags['C_CONTIGUOUS']
                     and yc.flags['C_CONTIGUOUS'])
     self.assertTrue(
         np.allclose(Xc, Xn) and np.allclose(Xpc, Xpn)
         and np.allclose(yc, yn))
     regr_train = d4p.linear_regression_training()
     rtc = regr_train.compute(Xc, yc)
     regr_predict = d4p.linear_regression_prediction()
     rpc = regr_predict.compute(Xpc, rtc.model)
     regr_train = d4p.linear_regression_training()
     rtn = regr_train.compute(Xn, yn)
     regr_predict = d4p.linear_regression_prediction()
     rpn = regr_predict.compute(Xpn, rtn.model)
     self.assertTrue(np.allclose(rpn.prediction, rpc.prediction))
示例#2
0
def main(readcsv=read_csv, method='defaultDense'):
    infile = "./data/batch/linear_regression_train.csv"
    testfile = "./data/batch/linear_regression_test.csv"

    # Configure a Linear regression training object
    train_algo = d4p.linear_regression_training(interceptFlag=True)

    # Read data. Let's have 10 independent,
    # and 2 dependent variables (for each observation)
    indep_data = readcsv(infile, range(10))
    dep_data = readcsv(infile, range(10, 12))
    # Now train/compute, the result provides the model for prediction
    train_result = train_algo.compute(indep_data, dep_data)

    # Now let's do some prediction
    predict_algo = d4p.linear_regression_prediction()
    # read test data (with same #features)
    pdata = readcsv(testfile, range(10))
    ptdata = readcsv(testfile, range(10, 12))
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # The prediction result provides prediction
    assert predict_result.prediction.shape == (pdata.shape[0],
                                               dep_data.shape[1])

    return (train_result, predict_result, ptdata)
示例#3
0
def run_inference(num_observations: int = 1000):
    """Run xgboost for specified number of observations"""
    # Load data
    test_df = common.get_test_data_df(X=common.X_df, size=num_observations)
    num_rows = len(test_df)
    ######################
    print("_______________________________________")
    print("Total Number of Rows", num_rows)
    run_times = []
    inference_times = []
    for _ in range(NUM_LOOPS):

        start_time = timer()
        predict_algo = d4p.linear_regression_prediction(fptype='float')
        predict_result = predict_algo.compute(test_df, train_result.model)
        #predictor.compute(data, MODEL)
        end_time = timer()

        total_time = end_time - start_time
        run_times.append(total_time * 10e3)

        inference_time = total_time * (10e6) / num_rows
        inference_times.append(inference_time)

    return_elem = common.calculate_stats(inference_times)
    print(num_observations, ", ", return_elem)
    return return_elem
示例#4
0
def compute(train_indep_data, train_dep_data, test_indep_data):
    # Configure a Linear regression training object
    train_algo = d4p.linear_regression_training(interceptFlag=True)
    # Now train/compute, the result provides the model for prediction
    train_result = train_algo.compute(train_indep_data, train_dep_data)
    # Now let's do some prediction
    predict_algo = d4p.linear_regression_prediction()
    # now predict using the model from the training above
    return predict_algo.compute(test_indep_data, train_result.model), train_result
示例#5
0
    def linearRegression(self, Data_Path, test_data_path, target, n):
        '''
        daal4py Linear Regression SPMD Mode
        '''

        # Initialize SPMD mode
        d4p.daalinit(nthreads=n)

        # training setup
        file = Data_Path + str(d4p.my_procid() + 1) + ".csv"
        data = pd.read_csv(file)
        X = data.drop(columns=target)
        y = data[target]

        train_algo = d4p.linear_regression_training(method='qrDense',
                                                    distributed=True)

        self.logger.info('Training the Linear Regression in pydaal SPMD Mode')

        start = time.time()

        train_result = train_algo.compute(X, y)

        self.latency['Parallel_LinearRegression_Pydaal_Time'] = time.time() - \
            start

        # test file setup
        test = pd.read_csv(test_data_path)

        y_test = test[target]
        X_test = test.drop(target, axis=1)

        if d4p.my_procid() == 0:
            predict_algo = d4p.linear_regression_prediction()

            # now predict using the model from the training above
            predict_result = predict_algo.compute(X_test, train_result.model)
            self.latency[
                "Overall Parallel Linear Regression Prediction SPMD Time"] = time.time(
                ) - start

            # The prediction result provides prediction
            #assert predict_result.prediction.shape == (X_test.shape[0], y.shape[1])

        d4p.daalfini()

        self.logger.info('Completed Linear Regression in pydaal SPMD Mode')

        # Compute metrics
        mse = mean_squared_error(y_test, predict_result.prediction)
        r2score = r2_score(y_test, predict_result.prediction)

        # Store the time taken and model metrics
        self.metrics['MSE_Parallel_LinearRegression_Pydaal'] = mse
        self.metrics['r2score_Parallel_LinearRegression_Pydaal'] = r2score

        return
示例#6
0
def _daal4py_predict(self, X):
    X = make2d(X)
    _fptype = getFPType(self.coef_)
    lr_pred = daal4py.linear_regression_prediction(
        fptype=_fptype,
        method='defaultDense')
    lr_res = lr_pred.compute(X, self.daal_model_)
    res = lr_res.prediction
    if res.shape[1] == 1:
        res = np.ravel(res)
    return res
def _daal4py_predict(self, X):
    X = make2d(X)
    _fptype = getFPType(self.coef_)
    lr_pred = daal4py.linear_regression_prediction(fptype=_fptype,
                                                   method='defaultDense')
    if X.shape[1] != self.n_features_in_:
        raise ValueError((f'X has {X.shape[1]} features, '
                          f'but LinearRegression is expecting '
                          f'{self.n_features_in_} features as input'))
    lr_res = lr_pred.compute(X, self.daal_model_)
    res = lr_res.prediction
    if res.shape[1] == 1 and self.coef_.ndim == 1:
        res = np.ravel(res)

    return res
示例#8
0
def _daal4py_predict(self, X):
    X = make2d(X)
    _fptype = getFPType(self.coef_)
    lr_pred = daal4py.linear_regression_prediction(
        fptype=_fptype,
        method='defaultDense'
    )
    try:
        lr_res = lr_pred.compute(X, self.daal_model_)
    except RuntimeError:
        raise ValueError('Input data shape {} is inconsistent with the trained model'.format(X.shape))

    res = lr_res.prediction
    if res.shape[1] == 1 and self.coef_.ndim == 1:
        res = np.ravel(res)

    return res
示例#9
0
def _daal4py_predict(self, X):
    X = make2d(X)
    _fptype = getFPType(self.coef_)
    lr_pred = daal4py.linear_regression_prediction(fptype=_fptype,
                                                   method='defaultDense')
    if sklearn_check_version('0.23'):
        if X.shape[1] != self.n_features_in_:
            raise ValueError(f'X has {X.shape[1]} features, '
                             f'but LinearRegression is expecting '
                             f'{self.n_features_in_} features as input')
    try:
        lr_res = lr_pred.compute(X, self.daal_model_)
    except RuntimeError:
        raise ValueError(
            f'Input data shape {X.shape} is inconsistent with the trained model'
        )
    res = lr_res.prediction
    if res.shape[1] == 1 and self.coef_.ndim == 1:
        res = np.ravel(res)

    return res
def main(readcsv=read_csv, method='defaultDense'):
    infile = "./data/batch/linear_regression_train.csv"
    testfile = "./data/batch/linear_regression_test.csv"

    # Configure a Linear regression training object for streaming
    train_algo = d4p.linear_regression_training(interceptFlag=True,
                                                streaming=True)

    chunk_size = 250
    lines_read = 0
    # read and feed chunk by chunk
    while True:
        # Read data in chunks
        # Let's have 10 independent, and 2 dependent variables (for each observation)
        try:
            indep_data = readcsv(infile, range(10), lines_read, chunk_size)
            dep_data = readcsv(infile, range(10, 12), lines_read, chunk_size)
        except:
            break
        # Now feed chunk
        train_algo.compute(indep_data, dep_data)
        lines_read += indep_data.shape[0]

    # All chunks are done, now finalize the computation
    train_result = train_algo.finalize()

    # Now let's do some prediction
    predict_algo = d4p.linear_regression_prediction()
    # read test data (with same #features)
    pdata = readcsv(testfile, range(10))
    ptdata = readcsv(testfile, range(10, 12))
    # now predict using the model from the training above
    predict_result = predict_algo.compute(pdata, train_result.model)

    # The prediction result provides prediction
    assert predict_result.prediction.shape == (pdata.shape[0],
                                               dep_data.shape[1])

    return (train_result, predict_result, ptdata)
示例#11
0
    def linearRegression(self, X_train, X_test, y_train, y_test, target):
        '''
        Method for Linear Regression
        '''

        # Configure a Linear regression training object
        train_algo = d4p.linear_regression_training(method='qrDense')

        self.logger.info(
            'Training the Linear Regression in pydaal Batch/Serial Mode')
        start = time.time()
        # Now train/compute, the result provides the model for prediction
        lm_trained = train_algo.compute(X_train, y_train)

        self.latency["Serial Linear Regression Batch Time"] = time.time() - \
            start

        y_pred = d4p.linear_regression_prediction().compute(
            X_test, lm_trained.model).prediction

        self.latency[
            'Overall Serial Linear Regression Prediction Batch Time'] = time.time(
            ) - start
        self.logger.info(
            'Completed Linear Regression in pydaal Batch/Serial Mode')

        # Compute metrics
        mse = mean_squared_error(y_test, y_pred)
        r2score = r2_score(y_test, y_pred)

        # Store the time taken and model metrics

        self.metrics['MSE_serial_linear_regression_pydaal'] = mse
        self.metrics['r2_score_serial_linear_regression_pydaal'] = r2score

        return
示例#12
0
# saving model to a file
pickle.dump(model, open(model_filename, "wb"))

# Now let's **load up the model** and look at one of the model's features.

# In[6]:

# loading the training model from a file
loaded_model = pickle.load(open(model_filename, "rb"))
print("Here is one of our loaded model's features: \n\n", loaded_model.Beta)

# ## Making a Prediction and Saving the Results

# Time to **make a prediction!**

# In[7]:

# now predicting the target feature(s) using the trained model
y_pred = d4p.linear_regression_prediction().compute(X_test,
                                                    loaded_model).prediction

# Now let's **export the results to a CSV file**.

# In[8]:

np.savetxt("./results/linear_regression_batch_results.csv",
           y_pred,
           delimiter=",")
print("[CODE_SAMPLE_COMPLETED_SUCCESFULLY]")
示例#13
0
            t1 = timeit.default_timer()
            r = func(*args, **keyArgs)
            t2 = timeit.default_timer()
            times.append(t2-t1)
        print(min(times))
        return r
    return st_func

p = args.size[0]
n = args.size[1]
X = rand(p,n)
Xp = rand(p,n)
y = rand(p,n)

regr_train = linear_regression_training()
regr_predict = linear_regression_prediction()

@st_time
def test_fit(X,y):
    regr_train.compute(X, y)

@st_time
def test_predict(X, m):
    regr_predict.compute(X, m)

print (','.join([args.batchID, args.arch, args.prefix, "Linear.fit", coreString(args.num_threads), "Double", "%sx%s" % (p,n)]), end=',')
test_fit(X, y)
res = regr_train.compute(X, y)
print (','.join([args.batchID, args.arch, args.prefix, "Linear.prediction", coreString(args.num_threads), "Double", "%sx%s" % (p,n)]), end=',')
test_predict(Xp, res.model)
示例#14
0
def test_predict(Xp, model):
    regr_predict = linear_regression_prediction(fptype=getFPType(Xp))
    return regr_predict.compute(Xp, model)
示例#15
0
    infile = "./data/distributed/linear_regression_train_" + str(
        d4p.my_procid() + 1) + ".csv"

    # Configure a Linear regression training object
    train_algo = d4p.linear_regression_training(distributed=True)

    # Read data. Let's have 10 independent, and 2 dependent variables (for each observation)
    indep_data = loadtxt(infile, delimiter=',', usecols=range(10))
    dep_data = loadtxt(infile, delimiter=',', usecols=range(10, 12))
    # Now train/compute, the result provides the model for prediction
    train_result = train_algo.compute(indep_data, dep_data)

    # Now let's do some prediction
    # It run only on a single node
    if d4p.my_procid() == 0:
        predict_algo = d4p.linear_regression_prediction()
        # read test data (with same #features)
        pdata = loadtxt("./data/distributed/linear_regression_test.csv",
                        delimiter=',',
                        usecols=range(10))
        # now predict using the model from the training above
        predict_result = d4p.linear_regression_prediction().compute(
            pdata, train_result.model)

        # The prediction result provides prediction
        assert predict_result.prediction.shape == (pdata.shape[0],
                                                   dep_data.shape[1])

    print('All looks good!')
    d4p.daalfini()
示例#16
0
# loading the training model from a file
loaded_model = joblib.load(open(model_filename, "rb"))
print("Here is one of our loaded model's features: \n\n", loaded_model.Beta)

# ## Making a Prediction and Saving the Results

# Time to **make a prediction!**

# In[19]:

# read test data
test_data = pd.read_csv("./data/linear_regression_test.csv").drop(["target"],
                                                                  axis=1)

# now predict using the model from the training above
predict_result = d4p.linear_regression_prediction().compute(
    test_data, train_result.model).prediction

# Now let's **export the results to a CSV file**. We will also **stop the distribution engine.**

# In[20]:

# now export the results to a CSV file
results_filename = "./results/daal4py_Distributed_LinearRegression_results" + str(
    d4p.my_procid() + 1) + ".csv"
np.savetxt(results_filename, predict_result, delimiter=",")

d4p.daalfini()  # stops the distribution engine
print("[CODE_SAMPLE_COMPLETED_SUCCESFULLY]")

# In[ ]:
示例#17
0
def lr_predict(N, D, model):
    data = np.random.ranf((N / 2, D))
    return daal4py.linear_regression_prediction().compute(data, model)