def main(): infile = "./data/batch/linear_regression_train.csv" testfile = "./data/batch/linear_regression_test.csv" # Configure a Ridge regression training object train_algo = d4p.ridge_regression_training(interceptFlag=True) # Read data. Let's have 10 independent, and 2 dependent variables (for each observation) indep_data = read_csv(infile, range(10)) dep_data = read_csv(infile, range(10, 12)) # Now train/compute, the result provides the model for prediction train_result = train_algo.compute(indep_data, dep_data) # Now let's do some prediction predict_algo = d4p.ridge_regression_prediction() # read test data (with same #features) pdata = read_csv(testfile, range(10)) ptdata = read_csv(testfile, range(10, 12)) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) return (predict_result, ptdata)
def ridgeRegression(self, X_train, X_test, y_train, y_test, target): ''' Method for Ridge Regression ''' # Configure a Ridge regression training object train_algo = d4p.ridge_regression_training(interceptFlag=True) self.logger.info('Training the Ridge Regression in pydaal Batch/Serial Mode') # time the computation time start_time = time.time() train_result = train_algo.compute(X_train, y_train) self.latency["Serial Ridge Regression Batch Time"] = time.time() - start_time predict_algo = d4p.ridge_regression_prediction() # Now train/compute, the result provides the model for prediction predict_result = predict_algo.compute(X_test, train_result.model) # stop_time = time.time() pd_predict = predict_result.prediction self.logger.info('Completed Ridge Regression in pydaal Batch/Serial Mode') # Compute metrics mse = mean_squared_error(y_test, pd_predict) r2score = r2_score(y_test, pd_predict) # Store the time taken and model metrics self.metrics["MSE For Serial Ridge regression Batch"] = mse self.metrics["R2 Score For Serial Ridge regression Batch"] = r2score return
def _daal4py_predict(self, X): X = make2d(X) _fptype = getFPType(self.coef_) ridge_palg = daal4py.ridge_regression_prediction( fptype=_fptype, method='defaultDense') ridge_res = ridge_palg.compute(X, self.daal_model_) res = ridge_res.prediction if res.shape[1] == 1: res = np.ravel(res) return res
def ridgeRegression(self, Data_Path, test_data_path, target, n): ''' daal4py Ridge Regression SPMD Mode ''' # Initialize SPMD mode d4p.daalinit(nthreads=n) file = Data_Path + str(d4p.my_procid() + 1) + ".csv" # training data = pd.read_csv(file) X = data.drop(columns=target) y = data[target] # test file setup test = pd.read_csv(test_data_path) y_test = test[target] X_test = test.drop(target, axis=1) # Configure a Ridge regression training object train_algo = d4p.ridge_regression_training(distributed=True, interceptFlag=True) self.logger.info('Training the Ridge Regression in pydaal SPMD Mode') start_time = time.time() train_result = train_algo.compute(X, y) self.latency["Parallel Ridge Regression SPMD Time"] = time.time() - \ start_time # Only process #0 reports results if d4p.my_procid() == 0: predict_algo = d4p.ridge_regression_prediction() # now predict using the model from the training above predict_result = predict_algo.compute(X_test, train_result.model) self.logger.info('Completed Ridge Regression in pydaal SPMD Mode') d4p.daalfini() # Compute metrics mse = mean_squared_error(y_test, predict_result.prediction) r2score = r2_score(y_test, predict_result.prediction) # Store the time taken and model metrics self.metrics["MSE For Parallel Ridge regression SPMD"] = mse self.metrics["R2 Score For Parallel Ridge regression SPMD"] = r2score return
def _daal4py_predict(self, X): X = make2d(X) _fptype = getFPType(self.coef_) ridge_palg = daal4py.ridge_regression_prediction(fptype=_fptype, method='defaultDense') if self.n_features_in_ != X.shape[1]: raise ValueError( f'X has {X.shape[1]} features, ' f'but Ridge is expecting {self.n_features_in_} features as input') ridge_res = ridge_palg.compute(X, self.daal_model_) res = ridge_res.prediction if res.shape[1] == 1 and self.coef_.ndim == 1: res = np.ravel(res) return res
def main(): infile = "./data/batch/linear_regression_train.csv" testfile = "./data/batch/linear_regression_test.csv" # Configure a Ridge regression training object for streaming train_algo = d4p.ridge_regression_training(interceptFlag=True, streaming=True) chunk_size = 250 lines_read = 0 # read and feed chunk by chunk while True: # Read data in chunks # Let's have 10 independent, and 2 dependent variables (for each observation) try: indep_data = read_csv(infile, range(10), lines_read, chunk_size) dep_data = read_csv(infile, range(10, 12), lines_read, chunk_size) except: break # Now feed chunk train_algo.compute(indep_data, dep_data) lines_read += indep_data.shape[0] # All chunks are done, now finalize the computation train_result = train_algo.finalize() # Now let's do some prediction predict_algo = d4p.ridge_regression_prediction() # read test data (with same #features) pdata = read_csv(testfile, range(10)) ptdata = read_csv(testfile, range(10, 12)) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) return (predict_result, ptdata)
def test_predict(Xp, model): regr_predict = ridge_regression_prediction(fptype=getFPType(Xp)) return regr_predict.compute(Xp, model)
infile = "./data/distributed/linear_regression_train_" + str( d4p.my_procid() + 1) + ".csv" # Configure a Ridge regression training object train_algo = d4p.ridge_regression_training(distributed=True) # Read data. Let's have 10 independent, and 2 dependent variables (for each observation) indep_data = loadtxt(infile, delimiter=',', usecols=range(10)) dep_data = loadtxt(infile, delimiter=',', usecols=range(10, 12)) # Now train/compute, the result provides the model for prediction train_result = train_algo.compute(indep_data, dep_data) # Now let's do some prediction # It run only on a single node if d4p.my_procid() == 0: predict_algo = d4p.ridge_regression_prediction(distributed=True) # read test data (with same #features) pdata = loadtxt("./data/distributed/linear_regression_test.csv", delimiter=',', usecols=range(10)) # now predict using the model from the training above predict_result = d4p.ridge_regression_prediction().compute( pdata, train_result.model) # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) print('All looks good!') d4p.daalfini()
r = func(*args, **keyArgs) t2 = timeit.default_timer() times.append(t2-t1) print (min(times)) return r return st_func p = args.size[0] n = args.size[1] X = rand(p,n) Xp = rand(p,n) y = rand(p,n) regr_train = ridge_regression_training() regr_predict = ridge_regression_prediction() @st_time def test_fit(X,y): regr_train.compute(X, y) @st_time def test_predict(X, m): regr_predict.compute(X, m) print (','.join([args.batchID, args.arch, args.prefix, "Ridge.fit", coreString(args.num_threads), "Double", "%sx%s" % (p,n)]), end=',') test_fit(X, y) res = regr_train.compute(X, y) print (','.join([args.batchID, args.arch, args.prefix, "Ridge.prediction", coreString(args.num_threads), "Double", "%sx%s" % (p,n)]), end=',') test_predict(Xp, res.model)