def get_daal_prediction(x=np.arange(10).reshape(10,1), y=np.arange(10).reshape(10,1)): ntX = HomogenNumericTable(x) ntY = HomogenNumericTable(y) ridge_training_algorithm = ridge_training.Batch() ridge_training_algorithm.input.set(ridge_training.data, ntX) ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY) # set parameter alpha = 0.0 alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2)) ridge_training_algorithm.parameter.ridgeParameters = alpha_nt result = ridge_training_algorithm.compute() model = result.get(ridge_training.model) ridge_prediction_algorithm = ridge_prediction.Batch() ridge_prediction_algorithm.input.setModel(ridge_prediction.model, model) ridge_prediction_algorithm.input.setTable(ridge_prediction.data, ntX) result = ridge_prediction_algorithm.compute() np_predicted = getNumpyArray(result.get(ridge_prediction.prediction)) # assert the same as the initial dependent variable assert_array_almost_equal(y, np_predicted) return np_predicted
def deserialize(self, serialObjectDict=None, fileName=None, useCompression=False): import daal if fileName != None and serialObjectDict == None: bufferArray = np.load(fileName) buffArrObjName = open(fileName.rsplit(".", 1)[0] + ".txt", "r").read() elif fileName == None and any(serialObjectDict): bufferArray = serialObjectDict["Array Object"] buffArrObjName = serialObjectDict["Object Information"] else: warnings.warn( 'Expecting "bufferArray" or "fileName" argument, NOT both') raise SystemExit if useCompression == True: bufferArray = MultiSVM.decompress(self, bufferArray) dataArch = OutputDataArchive(bufferArray) try: deSerialObj = eval(buffArrObjName) except AttributeError: deSerialObj = HomogenNumericTable() deSerialObj.deserialize(dataArch) return deSerialObj
def test_coeff_size(rows=10, columns=9): ''' number of beta coefficients (with intercept flag on) is the same number as size of data sample ''' inout = get_random_array(rows, columns) x = inout[0] y = inout[1] ntX = HomogenNumericTable(x) ntY = HomogenNumericTable(y) ridge_training_algorithm = ridge_training.Batch() ridge_training_algorithm.input.set(ridge_training.data, ntX) ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY) # set parameter alpha = 1.0 alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2)) ridge_training_algorithm.parameter.ridgeParameters = alpha_nt result = ridge_training_algorithm.compute() model = result.get(ridge_training.model) beta_coeff = model.getBeta() np_beta = getNumpyArray(beta_coeff) assert y.transpose().shape == np_beta.shape, "Dependent variable size must have\
def test_intercept_flag(rows=10, columns=9): inout = get_random_array(rows, columns) x = inout[0] y = inout[1] ntX = HomogenNumericTable(x) ntY = HomogenNumericTable(y) ridge_training_algorithm = ridge_training.Batch() ridge_training_algorithm.input.set(ridge_training.data, ntX) ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY) # set parameter alpha = 1.0 alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2)) ridge_training_algorithm.parameter.ridgeParameters = alpha_nt result = ridge_training_algorithm.compute() model = result.get(ridge_training.model) beta_coeff = model.getBeta() np_beta = getNumpyArray(beta_coeff) daal_intercept = np_beta[0,0] regression = ScikitRidgeRegression(alpha=1.0, fit_intercept=True) regression.fit(x, y) scikit_intercept = regression.intercept_ assert_array_almost_equal(scikit_intercept, [daal_intercept])
def testModel(trainingResult): # Initialize FileDataSource to retrieve the input data from a .csv file testDataSource = FileDataSource(testDatasetFileName, DataSourceIface.doAllocateNumericTable, DataSourceIface.doDictionaryFromContext) # Create Numeric Tables for testing data and ground truth values testData = HomogenNumericTable(NUM_FEATURES, 0, NumericTableIface.doNotAllocate) testGroundTruth = HomogenNumericTable(NUM_DEPENDENT_VARS, 0, NumericTableIface.doNotAllocate) mergedData = MergedNumericTable(testData, testGroundTruth) # Retrieve the data from an input file testDataSource.loadDataBlock(mergedData) # Create an algorithm object to predict values of ridge regression algorithm = prediction.Batch() # Pass a testing data set and the trained model to the algorithm algorithm.input.setTable(prediction.data, testData) algorithm.input.setModel(prediction.model, trainingResult.get(training.model)) # Predict values of ridge regression res = algorithm.compute() # Retrieve the algorithm results printNumericTable(res.get(prediction.prediction), "Ridge Regression prediction results: (first 10 rows):", 10) printNumericTable(testGroundTruth, "Ground truth (first 10 rows):", 10)
def execute(linearRegressionModelIndex): if (linearRegressionModelIndex == 0): print('\nExecution of Batch_Float64NormEqDense() function:') else: print('\nExecution of Batch_Float64QrDense() function:') remainingIndexes = [i for i in range(0, nFeatures)] start = time.time() trainDependentVariables = HomogenNumericTable(trainY) testDependentVariables = HomogenNumericTable(testY) trainDataNumTable = HomogenNumericTable(trainX.copy()) testDataNumTable = HomogenNumericTable(testX.copy()) start = time.time() for num in range(1000): trainingResult = trainModel(trainDataNumTable, trainDependentVariables, linearRegressionModelIndex) model = trainingResult.get(training.model) end = time.time() print('Performance comparison. Time: %s seconds' % (end - start)) predictionResult = predictResults(testDataNumTable, model) predicted = predictionResult.get(prediction.prediction) print('Linear regression. Test error: {:.2f}'.format( RMSE(testDependentVariables, predicted)))
def deserializeNumericTable(buffer): # Create a data archive to deserialize the numeric table dataArch = OutputDataArchive(buffer) # Create a numeric table object dataTable = HomogenNumericTable() # Deserialize the numeric table from the data archive dataTable.deserialize(dataArch) return dataTable
def getNumericTable(self, **kwargs): if self.informat == 'numpy': return HomogenNumericTable(self.indata) if self.informat == 'pandas': array = self.indata.as_matrix() return HomogenNumericTable(array) if self.informat == 'csv': dataSource = \ FileDataSource(self.indata, DataSource.doAllocateNumericTable, DataSource.doDictionaryFormContext) dataSource.loadDataBlock() return dataSource.getNumericTable() raise ValueError("Cannot identify input type.")
def train(self, data, responses, alpha=1.0): """Train a Ridge Regression model. Args: data: Training data responses: Known responses to the training data alpha: Regularization parameter, a small positive value with default 1.0 Returns: A Ridge Regression model object """ # Create a training algorithm object ridge_training_alg = ridge_training.Batch_Float64DefaultDense() # Set input ridge_training_alg.input.set(ridge_training.data, data) ridge_training_alg.input.set(ridge_training.dependentVariables, responses) # Set parameter alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2)) ridge_training_alg.parameter.ridgeParameters = alpha_nt # Compute results = ridge_training_alg.compute() # Return the trained model return results.get(ridge_training.model)
def initializeStep1Local(): global itemsPartialResultLocal, itemStep3LocalInput, userOffset, usersPartition # Create an algorithm object to initialize the implicit ALS model with the fastCSR method initAlgorithm = init.Distributed(step=step1Local) initAlgorithm.parameter.fullNUsers = nUsers initAlgorithm.parameter.nFactors = nFactors initAlgorithm.parameter.seed += rankId initAlgorithm.parameter.partition = HomogenNumericTable( np.array(usersPartition, dtype=np.float64)) # Pass a training data set to the algorithm initAlgorithm.input.set(init.data, transposedDataTable) # Initialize the implicit ALS model partialResult = initAlgorithm.compute() itemStep3LocalInput = partialResult.getCollection( init.outputOfInitForComputeStep3) userOffset = partialResult.getCollection(init.offsets, rankId) partialModelLocal = partialResult.getPartialModel(init.partialModel) itemsPartialResultLocal = training.DistributedPartialResultStep4() itemsPartialResultLocal.set(training.outputOfStep4ForStep1, partialModelLocal) return partialResult.getTablesCollection(init.outputOfStep1ForStep2)
def test_svd_daal_vs_sklearn(rows=1000, columns=1000): indata = get_random_array(rows, columns) daal_input = HomogenNumericTable(indata) algorithm = svd.Batch() algorithm.input.set(svd.data, daal_input) start_sklearn = time.time() _U, s, _Vh = np.linalg.svd(indata, full_matrices=False) end_sklearn = time.time() start_daal = time.time() result = algorithm.compute() end_daal = time.time() if os.getenv("CHECKPERFORMANCE") is not None: assert (end_daal - start_daal <= end_sklearn - start_sklearn) sigma = getNumpyArray(result.get(svd.singularValues)) _rows, cols = sigma.shape d_sigma = sigma.reshape(cols, ) assert_array_almost_equal(d_sigma, s) print("SVD for matrix[{}][{}]".format(rows, columns)) print("+ Sklearn SVD: {}".format(end_sklearn - start_sklearn)) print("+ Sklearn Daal: {}".format(end_daal - start_daal))
def test_linear_regression_simple(): # calculate beta coefficients x = np.array([0., 2., 3.]).reshape(3, 1) nt_x = nt_y = HomogenNumericTable(x) lr_alg = linear_training.Batch(method=linear_training.qrDense) lr_alg.input.set(linear_training.data, nt_x) lr_alg.input.set(linear_training.dependentVariables, nt_y) result = lr_alg.compute() model = result.get(linear_training.model) beta_coeff = model.getBeta() np_beta_coeff = getNumpyArray(beta_coeff) res_beta_coeff = np.array([0, 1]).reshape(1, 2) assert_almost_equal(res_beta_coeff, np_beta_coeff) # predict lr_alg_predict = linear_prediction.Batch() lr_alg_predict.input.setModel(linear_prediction.model, model) lr_alg_predict.input.setTable(linear_prediction.data, nt_x) result = lr_alg_predict.compute() np_predict = getNumpyArray(result.get(linear_prediction.prediction)) assert_array_almost_equal(x, np_predict)
def test_svd_simple(): indata = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) dataSource = HomogenNumericTable(indata) _in_rows, in_columns = indata.shape algorithm = svd.Batch(method=svd.defaultDense, leftSingularMatrix=svd.requiredInPackedForm, rightSingularMatrix=svd.requiredInPackedForm) algorithm.input.set(svd.data, dataSource) result = algorithm.compute() sigma = getNumpyArray(result.get(svd.singularValues)) U = getNumpyArray(result.get(svd.leftSingularMatrix)) V = getNumpyArray(result.get(svd.rightSingularMatrix)) assert sigma.shape[1] == in_columns assert indata.shape == U.shape assert in_columns == V.shape[0] == V.shape[1] assert_array_almost_equal(np.array([[14.269, 0.6268]]), sigma, decimal=4) assert_array_almost_equal(np.array([[-0.152, -0.823], [-0.350, -0.421], [-0.547, -0.020], [-0.745, 0.381]]), U, decimal=3) assert_array_almost_equal(np.array([[-0.641, -0.767], [0.767, -0.641]]), V, decimal=3)
def train(trainingData, trainingGroundTruth): batchSize = 10 learningRate = 0.01 sgdAlgorithm = optimization_solver.sgd.Batch(fptype=np.float32) arr = np.array([[learningRate]], dtype=np.float32) sgdAlgorithm.parameter.learningRateSequence = HomogenNumericTable( arr, ntype=np.float32) sgdAlgorithm.parameter.batchSize = batchSize sgdAlgorithm.parameter.nIterations = int( trainingData.getDimensionSize(0) / sgdAlgorithm.parameter.batchSize) topology = configureNet() net = training.Batch(sgdAlgorithm) sampleSize = trainingData.getDimensions() sampleSize[0] = batchSize net.initialize(sampleSize, topology) net.input.setInput(training.data, trainingData) net.input.setInput(training.groundTruth, trainingGroundTruth) res = net.compute() return res.get(training.model).getPredictionModel_Float64()
def train(self, X, y=None): ''' :param X: training data :param y: dependent variables (responses) :return: Ridge Regression model object ''' # Training data and responses Input = IInput.HomogenousDaalData(X).getNumericTable() Responses = IInput.HomogenousDaalData(y).getNumericTable() # Training object with normalization ridge_training_algorithm = ridge_training.Batch() # set input values ridge_training_algorithm.input.set(ridge_training.data, Input) ridge_training_algorithm.input.set(ridge_training.dependentVariables, Responses) # check if intercept flag is set ridge_training_algorithm.parameter.interceptFlag = True \ if 'intercept' in self.parameters else True # set parameter alpha_nt = HomogenNumericTable(np.array([self.alpha], ndmin=2)) ridge_training_algorithm.parameter.ridgeParameters = alpha_nt # calculate res = ridge_training_algorithm.compute() # return trained model self.model = res.get(ridge_training.model) return self.model
def get_daal_prediction(x=np.array([1, 2, 3]), y=np.array([1, 2, 3])): ntX = HomogenNumericTable(x) ntY = HomogenNumericTable(y) lr_train = linear_training.Batch() lr_train.input.set(linear_training.data, ntX) lr_train.input.set(linear_training.dependentVariables, ntY) result = lr_train.compute() model = result.get(linear_training.model) lr_predict = linear_prediction.Batch() lr_predict.input.setModel(linear_prediction.model, model) lr_predict.input.setTable(linear_prediction.data, ntX) result = lr_predict.compute() np_predicted = getNumpyArray(result.get(linear_prediction.prediction)) # assert the same as the initial dependent variable assert_array_almost_equal(y, np_predicted) return np_predicted
def test_zscore_multicolumns(): input_ = np.random.rand(10, 3) sc_zscore = stats.zscore(input_, axis=0, ddof=1) da_input = HomogenNumericTable(input_) da_zscore = z_score(da_input) np_da_zscore = getNumpyArray(da_zscore) assert_array_almost_equal(sc_zscore, np_da_zscore)
def get_learning_rate(learning_rate): """Gets a learning rate which is properly wrapped for usage in Intel DAAL solvers. Args: learning_rate (:obj:`float`): Learning rate. Returns: :py:class:`daal.data_management.HomogenNumericTable`: Wrapped learning rate. """ return HomogenNumericTable(1, 1, NumericTableIface.doAllocate, learning_rate)
def getOptimizationSolver(learningRate, batchSize): """Constructs the optimization solver with given learning rate""" # Create 1 x 1 NumericTable to store learning rate learningRateSequence = HomogenNumericTable(1, 1, NumericTableIface.doAllocate, learningRate, ntype=np.float32) # Create SGD optimization solver and set learning rate optalg = optimization_solver.sgd.Batch(ntype=np.float32) optalg.parameter.learningRateSequence = learningRateSequence optalg.parameter.batchSize = batchSize return optalg
def trainModel(trainingData, trainingGroundTruth, netLocal, netMaster): predictionModel = None partialResultsArchLength = 0 partialResultLocalBuffer = np.array([], dtype=np.uint8) partialResultMasterBuffer = np.array([], dtype=np.uint8) # Run the neural network training nSamples = trainingData.getDimensionSize(0) for i in range(0, nSamples - batchSizeLocal + 1, batchSizeLocal): # Compute weights and biases for the batch of inputs on local nodes # Pass a training data set and dependent values to the algorithm netLocal.input.setInput( training.data, getNextSubtensor(trainingData, i, batchSizeLocal)) netLocal.input.setInput( training.groundTruth, getNextSubtensor(trainingGroundTruth, i, batchSizeLocal)) # Compute weights and biases derivatives on local node pres = netLocal.compute() partialResults = [0] * nNodes gatherPartialResultsFromNodes(pres, partialResults, partialResultsArchLength, partialResultLocalBuffer, partialResultMasterBuffer) wb = HomogenNumericTable() if rankId == MPI_ROOT: for node in range(nNodes): # Pass computed weights and biases derivatives to the master algorithm netMaster.input.add(training.partialResults, node, partialResults[node]) # Update weights and biases on master node pres = netMaster.compute() wbModel = pres.get(training.resultFromMaster).get(training.model) wb = wbModel.getWeightsAndBiases() # Broadcast updated weights and biases to nodes wbLocal = broadcastWeightsAndBiasesToNodes(wb) netLocal.input.getStep1LocalInput( training.inputModel).setWeightsAndBiases(wbLocal) if rankId == MPI_ROOT: # Finalize neural network training on the master node res = netMaster.finalizeCompute() # Retrieve training and prediction models of the neural network trModel = res.get(training.model) predictionModel = trModel.getPredictionModel_Float32() return predictionModel
def getNumericTableFromCSV(csvFileName, Rows='All'): dataSource = FileDataSource(csvFileName, DataSourceIface.doAllocateNumericTable, DataSourceIface.doDictionaryFromContext) nT = HomogenNumericTable() if type(Rows) != str: dataSource.loadDataBlock(Rows, nT) elif Rows == 'All': dataSource.loadDataBlock(nT) else: warnings.warn('Type error in "Rows" arguments, Can be only int') raise SystemError return nT
def test_ridge_regression_simple(): # calculate beta coefficients x = np.array([0., 2., 3.]).reshape(3, 1) nt_x = nt_y = HomogenNumericTable(x) ridge_training_algorithm = ridge_training.Batch() # set input values ridge_training_algorithm.input.set(ridge_training.data, nt_x) ridge_training_algorithm.input.set(ridge_training.dependentVariables, nt_y) # check if intercept flag is set #ridge_training_algorithm.parameter.interceptFlag = True \ # if 'intercept' in self.parameters else True # set parameter alpha = 1.0 alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2)) ridge_training_algorithm.parameter.ridgeParameters = alpha_nt # calculate res = ridge_training_algorithm.compute() # return trained model model = res.get(ridge_training.model) beta_coeff = model.getBeta() np_beta_coeff = getNumpyArray(beta_coeff) res_beta_coeff = np.array([0.294118, 0.823529]).reshape(1, 2) assert_array_almost_equal(res_beta_coeff, np_beta_coeff) # predict ridge_prediction_algorithm = ridge_prediction.Batch_Float64DefaultDense( ) ridge_prediction_algorithm.input.setModel(ridge_prediction.model, model) ridge_prediction_algorithm.input.setTable(ridge_prediction.data, nt_x) result = ridge_prediction_algorithm.compute() np_predict = getNumpyArray(result.get(ridge_prediction.prediction)) assert_array_almost_equal(x, np_predict, decimal=0)
def test_coeff_size(rows=10, columns=9): ''' number of beta coefficients (with intercept flag on) is the same number as size of data sample ''' inout = get_random_array(rows, columns) test_overfitting(rows, columns) x = inout[0] y = inout[1] ntX = HomogenNumericTable(x) ntY = HomogenNumericTable(y) lr_train = linear_training.Batch() lr_train.input.set(linear_training.data, ntX) lr_train.input.set(linear_training.dependentVariables, ntY) result = lr_train.compute() model = result.get(linear_training.model) beta_coeff = model.getBeta() np_beta = getNumpyArray(beta_coeff) assert y.transpose().shape == np_beta.shape, "Dependent variable size must have\
def getBlockOfNumericTable(nT, Rows='All', Columns='All'): from daal.data_management import HomogenNumericTable_Float64, \ MergedNumericTable, readOnly, BlockDescriptor import numpy as np # Get First and Last Row indexes lastRow = nT.getNumberOfRows() if type(Rows) != str: if type(Rows) == list: firstRow = Rows[0] if len(Rows) == 2: lastRow = min(Rows[1], lastRow) else: firstRow = 0 lastRow = Rows elif Rows == 'All': firstRow = 0 else: warnings.warn( 'Type error in "Rows" arguments, Can be only int/list type') raise SystemExit # Get First and Last Column indexes nEndDim = nT.getNumberOfColumns() if type(Columns) != str: if type(Columns) == list: nStartDim = Columns[0] if len(Columns) == 2: nEndDim = min(Columns[1], nEndDim) else: nStartDim = 0 nEndDim = Columns elif Columns == 'All': nStartDim = 0 else: warnings.warn( 'Type error in "Columns" arguments, Can be only int/list type') raise SystemExit #Retrieve block of Columns Values within First & Last Rows #Merge all the retrieved block of Columns Values #Return merged numeric table mnT = MergedNumericTable() for idx in range(nStartDim, nEndDim): block = BlockDescriptor() nT.getBlockOfColumnValues(idx, firstRow, (lastRow - firstRow), readOnly, block) mnT.addNumericTable(HomogenNumericTable_Float64(block.getArray())) nT.releaseBlockOfColumnValues(block) block = BlockDescriptor() mnT.getBlockOfRows(0, mnT.getNumberOfRows(), readOnly, block) mnT = HomogenNumericTable(block.getArray()) return mnT
def callback(recognizer, audio): # received audio data, now we'll recognize it using Google Speech Recognition try: text = r.recognize_google(audio) print( "you said: " + text ) if "bye" in text: f=open('data.txt','a+') f.write("bye"+","+time.ctime()+","+"bye") sys.exit() review1 = re.sub('[^a-zA-Z]', ' ', text) review1 = review1.lower() review1 = word_tokenize(review1) reviews="" ps = PorterStemmer() for word in review1: if word in tokens: if word not in set(stopwords.words('english')): reviews = reviews+" "+ps.stem(word) #review1 = ' '.join(review1) if reviews!="": do=[] for i in tokens: temp=[] k=reviews.split() temp.append(k.count(i)) do.append(temp) iar=np.array(do) iar=iar.T #print (len(corpus),len(tokens)) reviewData = HomogenNumericTable(iar) pre=daal_svm.predict(trainingResult,reviewData) l=getArrayFromNT(pre) print(outcome[int(l[0][0])]) pred.append([text,time.ctime(),outcome[int(l[0][0])]]) f=open('data.txt','a+') f.write(text+","+time.ctime()+","+outcome[int(l[0][0])]+"\r\n") except sr.UnknownValueError: print("Google Speech Recognition could not understand audio") except sr.RequestError as e: print("Could not request results from Google Speech Recognition service; {0}".format(e))
def test_intercept_flag(rows=10, columns=9): inout = get_random_array(rows, columns) test_overfitting(rows, columns) x = inout[0] y = inout[1] ntX = HomogenNumericTable(x) ntY = HomogenNumericTable(y) lr_train = linear_training.Batch() lr_train.input.set(linear_training.data, ntX) lr_train.input.set(linear_training.dependentVariables, ntY) result = lr_train.compute() model = result.get(linear_training.model) beta_coeff = model.getBeta() np_beta = getNumpyArray(beta_coeff) daal_intercept = np_beta[0, 0] from sklearn.linear_model.base import LinearRegression as ScikitLinearRegression regression = ScikitLinearRegression() regression.fit(x, y) scikit_intercept = regression.intercept_ assert_array_almost_equal(scikit_intercept, [daal_intercept])
def broadcastWeightsAndBiasesToNodes(wb): wbBuffer = None # Serialize weights and biases on the root node if rankId == MPI_ROOT: if not wb: # Weights and biases table should be valid and not NULL on master return HomogenNumericTable() wbDataArch = InputDataArchive() wb.serialize(wbDataArch) wbBuffer = np.zeros(wbDataArch.getSizeOfArchive(), dtype=np.uint8) wbDataArch.copyArchiveToArray(wbBuffer) # Broadcast the serialized weights and biases wbBuffer = comm.bcast(wbBuffer) # Deserialize weights and biases wbDataArchLocal = OutputDataArchive(wbBuffer) wbLocal = HomogenNumericTable(ntype=np.float32) wbLocal.deserialize(wbDataArchLocal) return wbLocal
def computeOutBlocks(nBlocks, dataBlock, dataBlockPartition): nRows = dataBlock.getNumberOfRows() blockIdFlags = np.zeros(nRows * nBlocks, dtype=np.uint8) (_, colIndices, rowOffsets) = dataBlock.getArrays() for i in range(nRows): for j in range(int(rowOffsets[i] - 1), int(rowOffsets[i + 1] - 1)): for k in range(1, nBlocks + 1): if dataBlockPartition[k - 1] <= colIndices[ j] - 1 and colIndices[j] - 1 < dataBlockPartition[k]: blockIdFlags[(k - 1) * nRows + i] = 1 nNotNull = [0] * nBlocks for i in range(nBlocks): nNotNull[i] = 0 for j in range(nRows): nNotNull[i] += blockIdFlags[i * nRows + j] result = KeyValueDataCollection() for i in range(nBlocks): indicesTable = HomogenNumericTable(1, int(nNotNull[i]), NumericTableIface.doAllocate, ntype=np.intc) indices = indicesTable.getArray() indexId = 0 for j in range(nRows): if blockIdFlags[i * nRows + j]: indices[indexId] = int(j) indexId += 1 result[i] = indicesTable return result
def trainModel(): global trainingResult masterAlgorithm = training.Distributed_Step2MasterFloat64NormEqDense() for filenameIndex in range(rankId, len(trainDatasetFileNames), comm_size): trainDataSource = FileDataSource( trainDatasetFileNames[filenameIndex], DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext) trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate) trainDependentVariables = HomogenNumericTable( nDependentVariables, 0, NumericTableIface.notAllocate) mergedData = MergedNumericTable(trainData, trainDependentVariables) trainDataSource.loadDataBlock(mergedData) localAlgorithm = training.Distributed_Step1LocalFloat64NormEqDense() localAlgorithm.input.set(training.data, trainData) localAlgorithm.input.set(training.dependentVariables, trainDependentVariables) pres = localAlgorithm.compute() masterAlgorithm.input.add(training.partialModels, pres) mergedData.freeDataMemory() trainData.freeDataMemory() trainDependentVariables.freeDataMemory() pres = masterAlgorithm.compute() dataArch = InputDataArchive() pres.serialize(dataArch) nodeResults = dataArch.getArchiveAsArray() serializedData = comm.gather(nodeResults) if rankId == MPI_ROOT: print("Number of processes is %d." % (len(serializedData))) masterAlgorithm = training.Distributed_Step2MasterFloat64NormEqDense() for i in range(comm_size): dataArch = OutputDataArchive(serializedData[i]) dataForStep2FromStep1 = training.PartialResult() dataForStep2FromStep1.deserialize(dataArch) masterAlgorithm.input.add(training.partialModels, dataForStep2FromStep1) masterAlgorithm.compute() trainingResult = masterAlgorithm.finalizeCompute()
def initializeNetwork(): # Read training data set from a .csv file and create tensors to store input data trainingData = readTensorFromCSV(trainDatasetFileNames[rankId]) trainingGroundTruth = readTensorFromCSV(trainGroundTruthFileNames[rankId], True) # Create AdaGrad optimization solver algorithm solver = optimization_solver.adagrad.Batch(ntpye=np.float32) # Set learning rate for the optimization solver used in the neural network learningRate = 0.001 solver.parameter.learningRate = HomogenNumericTable( 1, 1, NumericTableIface.doAllocate, learningRate) solver.parameter.batchSize = batchSizeLocal solver.parameter.optionalResultRequired = True trainingModel = None # Algorithms to train neural network netLocal = training.Distributed(step1Local) netMaster = training.Distributed(step2Master, solver) sampleSize = trainingData.getDimensions() sampleSize[0] = batchSizeLocal # Configure the neural network topology topology = configureNet() if rankId == MPI_ROOT: # Set the optimization solver for the neural network training netMaster.parameter.optimizationSolver = solver # Initialize the neural network on master node netMaster.initialize(sampleSize, topology) trainingModel = netMaster.getResult().get(training.model) else: # Configure the neural network on local nodes trainingModel = training.Model() trainingModel.initialize_Float32(sampleSize, topology) # Pass a model from master node to the algorithms on local nodes netLocal.input.setStep1LocalInput(training.inputModel, trainingModel) return (trainingData, trainingGroundTruth, netLocal, netMaster)