Python HomogenNumericTable示例，daal.data_management.HomogenNumericTable Python示例

示例#1

0

显示文件

文件： test_daal_ridge_regression.py 项目： yinxx/h2o4gpu

    def get_daal_prediction(x=np.arange(10).reshape(10,1), y=np.arange(10).reshape(10,1)):

        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        ridge_training_algorithm = ridge_training.Batch()
        ridge_training_algorithm.input.set(ridge_training.data, ntX)
        ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY)

        # set parameter
        alpha = 0.0
        alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2))
        ridge_training_algorithm.parameter.ridgeParameters = alpha_nt

        result = ridge_training_algorithm.compute()
        model = result.get(ridge_training.model)

        ridge_prediction_algorithm = ridge_prediction.Batch()
        ridge_prediction_algorithm.input.setModel(ridge_prediction.model, model)
        ridge_prediction_algorithm.input.setTable(ridge_prediction.data, ntX)
        result = ridge_prediction_algorithm.compute()

        np_predicted = getNumpyArray(result.get(ridge_prediction.prediction))
        # assert the same as the initial dependent variable
        assert_array_almost_equal(y, np_predicted)
        return np_predicted

示例#2

0

显示文件

文件： SVM.py 项目： jadidaniel/Prediction-of-epidemic-disease-dynamics-using-Machine-learning-model

 def deserialize(self,
                 serialObjectDict=None,
                 fileName=None,
                 useCompression=False):
     import daal
     if fileName != None and serialObjectDict == None:
         bufferArray = np.load(fileName)
         buffArrObjName = open(fileName.rsplit(".", 1)[0] + ".txt",
                               "r").read()
     elif fileName == None and any(serialObjectDict):
         bufferArray = serialObjectDict["Array Object"]
         buffArrObjName = serialObjectDict["Object Information"]
     else:
         warnings.warn(
             'Expecting "bufferArray" or "fileName" argument, NOT both')
         raise SystemExit
     if useCompression == True:
         bufferArray = MultiSVM.decompress(self, bufferArray)
     dataArch = OutputDataArchive(bufferArray)
     try:
         deSerialObj = eval(buffArrObjName)
     except AttributeError:
         deSerialObj = HomogenNumericTable()
     deSerialObj.deserialize(dataArch)
     return deSerialObj

示例#3

0

显示文件

文件： test_daal_ridge_regression.py 项目： yinxx/h2o4gpu

    def test_coeff_size(rows=10, columns=9):
        '''
        number of beta coefficients (with intercept flag on)
        is the same number as size of data sample
        '''
        inout = get_random_array(rows, columns)
        x = inout[0]
        y = inout[1]

        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        ridge_training_algorithm = ridge_training.Batch()
        ridge_training_algorithm.input.set(ridge_training.data, ntX)
        ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY)

        # set parameter
        alpha = 1.0
        alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2))
        ridge_training_algorithm.parameter.ridgeParameters = alpha_nt

        result = ridge_training_algorithm.compute()
        model = result.get(ridge_training.model)
        beta_coeff = model.getBeta()
        np_beta = getNumpyArray(beta_coeff)

        assert y.transpose().shape == np_beta.shape, "Dependent variable size must have\

示例#4

0

显示文件

文件： test_daal_ridge_regression.py 项目： yinxx/h2o4gpu

    def test_intercept_flag(rows=10, columns=9):
        inout = get_random_array(rows, columns)
        x = inout[0]
        y = inout[1]

        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        ridge_training_algorithm = ridge_training.Batch()
        ridge_training_algorithm.input.set(ridge_training.data, ntX)
        ridge_training_algorithm.input.set(ridge_training.dependentVariables, ntY)

        # set parameter
        alpha = 1.0
        alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2))
        ridge_training_algorithm.parameter.ridgeParameters = alpha_nt

        result = ridge_training_algorithm.compute()

        model = result.get(ridge_training.model)
        beta_coeff = model.getBeta()
        np_beta = getNumpyArray(beta_coeff)
        daal_intercept = np_beta[0,0]

        regression = ScikitRidgeRegression(alpha=1.0, fit_intercept=True)
        regression.fit(x, y)

        scikit_intercept = regression.intercept_
        assert_array_almost_equal(scikit_intercept, [daal_intercept])

示例#5

0

显示文件

文件： ridge_regression_norm_eq_distributed_mpi.py 项目： anjgola/samples

def testModel(trainingResult):

    # Initialize FileDataSource to retrieve the input data from a .csv file
    testDataSource = FileDataSource(testDatasetFileName,
                                    DataSourceIface.doAllocateNumericTable,
                                    DataSourceIface.doDictionaryFromContext)

    # Create Numeric Tables for testing data and ground truth values
    testData = HomogenNumericTable(NUM_FEATURES, 0,
                                   NumericTableIface.doNotAllocate)
    testGroundTruth = HomogenNumericTable(NUM_DEPENDENT_VARS, 0,
                                          NumericTableIface.doNotAllocate)
    mergedData = MergedNumericTable(testData, testGroundTruth)

    # Retrieve the data from an input file
    testDataSource.loadDataBlock(mergedData)

    # Create an algorithm object to predict values of ridge regression
    algorithm = prediction.Batch()

    # Pass a testing data set and the trained model to the algorithm
    algorithm.input.setTable(prediction.data, testData)
    algorithm.input.setModel(prediction.model,
                             trainingResult.get(training.model))

    # Predict values of ridge regression
    res = algorithm.compute()

    # Retrieve the algorithm results
    printNumericTable(res.get(prediction.prediction),
                      "Ridge Regression prediction results: (first 10 rows):",
                      10)
    printNumericTable(testGroundTruth, "Ground truth (first 10 rows):", 10)

示例#6

0

显示文件

文件： daal_regression.py 项目： iburyl/Labs

def execute(linearRegressionModelIndex):
    if (linearRegressionModelIndex == 0):
        print('\nExecution of Batch_Float64NormEqDense() function:')
    else:
        print('\nExecution of Batch_Float64QrDense() function:')
    remainingIndexes = [i for i in range(0, nFeatures)]
    start = time.time()
    trainDependentVariables = HomogenNumericTable(trainY)
    testDependentVariables = HomogenNumericTable(testY)

    trainDataNumTable = HomogenNumericTable(trainX.copy())
    testDataNumTable = HomogenNumericTable(testX.copy())

    start = time.time()
    for num in range(1000):
        trainingResult = trainModel(trainDataNumTable, trainDependentVariables, linearRegressionModelIndex)
        model = trainingResult.get(training.model)
    end = time.time()

    print('Performance comparison. Time: %s seconds' % (end - start))

    predictionResult = predictResults(testDataNumTable, model)
    predicted = predictionResult.get(prediction.prediction)
    print('Linear regression. Test error: {:.2f}'.format(
        RMSE(testDependentVariables, predicted)))

示例#7

0

显示文件

文件： distributed_hdfs_dataset.py 项目： anjgola/samples

def deserializeNumericTable(buffer):

    #  Create a data archive to deserialize the numeric table
    dataArch = OutputDataArchive(buffer)

    #  Create a numeric table object
    dataTable = HomogenNumericTable()

    #  Deserialize the numeric table from the data archive
    dataTable.deserialize(dataArch)

    return dataTable

示例#8

0

显示文件

文件： IInput.py 项目： yxoos/h2o4gpu

 def getNumericTable(self, **kwargs):
     if self.informat == 'numpy':
         return HomogenNumericTable(self.indata)
     if self.informat == 'pandas':
         array = self.indata.as_matrix()
         return HomogenNumericTable(array)
     if self.informat == 'csv':
         dataSource =  \
             FileDataSource(self.indata,
                            DataSource.doAllocateNumericTable,
                            DataSource.doDictionaryFormContext)
         dataSource.loadDataBlock()
         return dataSource.getNumericTable()
     raise ValueError("Cannot identify input type.")

示例#9

0

显示文件

文件： regression.py 项目： zhangzhang10/pydaal-tutorials

    def train(self, data, responses, alpha=1.0):
        """Train a Ridge Regression model.

        Args:
           data: Training data
           responses: Known responses to the training data
           alpha: Regularization parameter, a small positive value with default
           1.0

        Returns:
            A Ridge Regression model object
        """

        # Create a training algorithm object
        ridge_training_alg = ridge_training.Batch_Float64DefaultDense()
        # Set input
        ridge_training_alg.input.set(ridge_training.data, data)
        ridge_training_alg.input.set(ridge_training.dependentVariables,
                                     responses)
        # Set parameter
        alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2))
        ridge_training_alg.parameter.ridgeParameters = alpha_nt
        # Compute
        results = ridge_training_alg.compute()
        # Return the trained model
        return results.get(ridge_training.model)

示例#10

0

显示文件

文件： implicit_als_csr_distributed_mpi.py 项目： anjgola/samples

def initializeStep1Local():
    global itemsPartialResultLocal, itemStep3LocalInput, userOffset, usersPartition

    # Create an algorithm object to initialize the implicit ALS model with the fastCSR method
    initAlgorithm = init.Distributed(step=step1Local)
    initAlgorithm.parameter.fullNUsers = nUsers
    initAlgorithm.parameter.nFactors = nFactors
    initAlgorithm.parameter.seed += rankId
    initAlgorithm.parameter.partition = HomogenNumericTable(
        np.array(usersPartition, dtype=np.float64))

    # Pass a training data set to the algorithm
    initAlgorithm.input.set(init.data, transposedDataTable)

    # Initialize the implicit ALS model
    partialResult = initAlgorithm.compute()
    itemStep3LocalInput = partialResult.getCollection(
        init.outputOfInitForComputeStep3)
    userOffset = partialResult.getCollection(init.offsets, rankId)
    partialModelLocal = partialResult.getPartialModel(init.partialModel)

    itemsPartialResultLocal = training.DistributedPartialResultStep4()
    itemsPartialResultLocal.set(training.outputOfStep4ForStep1,
                                partialModelLocal)

    return partialResult.getTablesCollection(init.outputOfStep1ForStep2)

示例#11

0

显示文件

文件： test_daal_svd.py 项目： yinxx/h2o4gpu

    def test_svd_daal_vs_sklearn(rows=1000, columns=1000):
        indata = get_random_array(rows, columns)
        daal_input = HomogenNumericTable(indata)
        algorithm = svd.Batch()
        algorithm.input.set(svd.data, daal_input)

        start_sklearn = time.time()
        _U, s, _Vh = np.linalg.svd(indata, full_matrices=False)
        end_sklearn = time.time()

        start_daal = time.time()
        result = algorithm.compute()
        end_daal = time.time()

        if os.getenv("CHECKPERFORMANCE") is not None:
            assert (end_daal - start_daal <= end_sklearn - start_sklearn)

        sigma = getNumpyArray(result.get(svd.singularValues))
        _rows, cols = sigma.shape
        d_sigma = sigma.reshape(cols, )

        assert_array_almost_equal(d_sigma, s)

        print("SVD for matrix[{}][{}]".format(rows, columns))
        print("+ Sklearn SVD: {}".format(end_sklearn - start_sklearn))
        print("+ Sklearn Daal: {}".format(end_daal - start_daal))

示例#12

0

显示文件

文件： test_daal_regression.py 项目： teju85/h2o4gpu

    def test_linear_regression_simple():

        # calculate beta coefficients
        x = np.array([0., 2., 3.]).reshape(3, 1)

        nt_x = nt_y = HomogenNumericTable(x)

        lr_alg = linear_training.Batch(method=linear_training.qrDense)
        lr_alg.input.set(linear_training.data, nt_x)
        lr_alg.input.set(linear_training.dependentVariables, nt_y)
        result = lr_alg.compute()
        model = result.get(linear_training.model)
        beta_coeff = model.getBeta()
        np_beta_coeff = getNumpyArray(beta_coeff)

        res_beta_coeff = np.array([0, 1]).reshape(1, 2)

        assert_almost_equal(res_beta_coeff, np_beta_coeff)

        # predict
        lr_alg_predict = linear_prediction.Batch()
        lr_alg_predict.input.setModel(linear_prediction.model, model)
        lr_alg_predict.input.setTable(linear_prediction.data, nt_x)
        result = lr_alg_predict.compute()
        np_predict = getNumpyArray(result.get(linear_prediction.prediction))
        assert_array_almost_equal(x, np_predict)

示例#13

0

显示文件

文件： test_daal_svd.py 项目： yinxx/h2o4gpu

    def test_svd_simple():
        indata = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
        dataSource = HomogenNumericTable(indata)
        _in_rows, in_columns = indata.shape

        algorithm = svd.Batch(method=svd.defaultDense,
                              leftSingularMatrix=svd.requiredInPackedForm,
                              rightSingularMatrix=svd.requiredInPackedForm)

        algorithm.input.set(svd.data, dataSource)
        result = algorithm.compute()

        sigma = getNumpyArray(result.get(svd.singularValues))
        U = getNumpyArray(result.get(svd.leftSingularMatrix))
        V = getNumpyArray(result.get(svd.rightSingularMatrix))

        assert sigma.shape[1] == in_columns
        assert indata.shape == U.shape
        assert in_columns == V.shape[0] == V.shape[1]

        assert_array_almost_equal(np.array([[14.269, 0.6268]]),
                                  sigma,
                                  decimal=4)

        assert_array_almost_equal(np.array([[-0.152, -0.823], [-0.350, -0.421],
                                            [-0.547, -0.020], [-0.745,
                                                               0.381]]),
                                  U,
                                  decimal=3)

        assert_array_almost_equal(np.array([[-0.641, -0.767], [0.767,
                                                               -0.641]]),
                                  V,
                                  decimal=3)

示例#14

0

显示文件

文件： daal_lenet.py 项目： anjgola/samples

def train(trainingData, trainingGroundTruth):
    batchSize = 10
    learningRate = 0.01

    sgdAlgorithm = optimization_solver.sgd.Batch(fptype=np.float32)
    arr = np.array([[learningRate]], dtype=np.float32)
    sgdAlgorithm.parameter.learningRateSequence = HomogenNumericTable(
        arr, ntype=np.float32)
    sgdAlgorithm.parameter.batchSize = batchSize
    sgdAlgorithm.parameter.nIterations = int(
        trainingData.getDimensionSize(0) / sgdAlgorithm.parameter.batchSize)

    topology = configureNet()

    net = training.Batch(sgdAlgorithm)

    sampleSize = trainingData.getDimensions()
    sampleSize[0] = batchSize
    net.initialize(sampleSize, topology)

    net.input.setInput(training.data, trainingData)
    net.input.setInput(training.groundTruth, trainingGroundTruth)

    res = net.compute()

    return res.get(training.model).getPredictionModel_Float64()

示例#15

0

显示文件

文件： regression.py 项目： zzhuuh/h2o4gpu

    def train(self, X, y=None):
        '''
        :param X: training data
        :param y: dependent variables (responses)
        :return: Ridge Regression model object
        '''

        # Training data and responses
        Input = IInput.HomogenousDaalData(X).getNumericTable()
        Responses = IInput.HomogenousDaalData(y).getNumericTable()

        # Training object with normalization
        ridge_training_algorithm = ridge_training.Batch()

        # set input values
        ridge_training_algorithm.input.set(ridge_training.data, Input)
        ridge_training_algorithm.input.set(ridge_training.dependentVariables,
                                           Responses)
        # check if intercept flag is set
        ridge_training_algorithm.parameter.interceptFlag = True \
            if 'intercept' in self.parameters else True
        # set parameter
        alpha_nt = HomogenNumericTable(np.array([self.alpha], ndmin=2))
        ridge_training_algorithm.parameter.ridgeParameters = alpha_nt
        # calculate
        res = ridge_training_algorithm.compute()
        # return trained model
        self.model = res.get(ridge_training.model)
        return self.model

示例#16

0

显示文件

文件： test_daal_regression.py 项目： teju85/h2o4gpu

    def get_daal_prediction(x=np.array([1, 2, 3]), y=np.array([1, 2, 3])):
        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        lr_train = linear_training.Batch()
        lr_train.input.set(linear_training.data, ntX)
        lr_train.input.set(linear_training.dependentVariables, ntY)
        result = lr_train.compute()
        model = result.get(linear_training.model)

        lr_predict = linear_prediction.Batch()
        lr_predict.input.setModel(linear_prediction.model, model)
        lr_predict.input.setTable(linear_prediction.data, ntX)
        result = lr_predict.compute()

        np_predicted = getNumpyArray(result.get(linear_prediction.prediction))
        # assert the same as the initial dependent variable
        assert_array_almost_equal(y, np_predicted)
        return np_predicted

示例#17

0

显示文件

文件： test_daal_normalization.py 项目： yinxx/h2o4gpu

    def test_zscore_multicolumns():

        input_ = np.random.rand(10, 3)
        sc_zscore = stats.zscore(input_, axis=0, ddof=1)

        da_input = HomogenNumericTable(input_)
        da_zscore = z_score(da_input)
        np_da_zscore = getNumpyArray(da_zscore)

        assert_array_almost_equal(sc_zscore, np_da_zscore)

示例#18

0

显示文件

文件： helpers.py 项目： jqk6/daal-extensions

def get_learning_rate(learning_rate):
	"""Gets a learning rate which is properly wrapped for usage in Intel DAAL solvers.

	Args:
		learning_rate (:obj:`float`): Learning rate.

	Returns:
		:py:class:`daal.data_management.HomogenNumericTable`: Wrapped learning rate.
	"""
	return HomogenNumericTable(1, 1, NumericTableIface.doAllocate, learning_rate)

示例#19

0

显示文件

def getOptimizationSolver(learningRate, batchSize):
    """Constructs the optimization solver with given learning rate"""

    # Create 1 x 1 NumericTable to store learning rate
    learningRateSequence = HomogenNumericTable(1, 1, NumericTableIface.doAllocate, learningRate, ntype=np.float32)

    # Create SGD optimization solver and set learning rate
    optalg = optimization_solver.sgd.Batch(ntype=np.float32)
    optalg.parameter.learningRateSequence = learningRateSequence
    optalg.parameter.batchSize = batchSize
    return optalg

示例#20

0

显示文件

文件： neural_net_dense_distributed_mpi.py 项目： anjgola/samples

def trainModel(trainingData, trainingGroundTruth, netLocal, netMaster):

    predictionModel = None
    partialResultsArchLength = 0
    partialResultLocalBuffer = np.array([], dtype=np.uint8)
    partialResultMasterBuffer = np.array([], dtype=np.uint8)

    # Run the neural network training
    nSamples = trainingData.getDimensionSize(0)
    for i in range(0, nSamples - batchSizeLocal + 1, batchSizeLocal):
        # Compute weights and biases for the batch of inputs on local nodes
        # Pass a training data set and dependent values to the algorithm
        netLocal.input.setInput(
            training.data, getNextSubtensor(trainingData, i, batchSizeLocal))
        netLocal.input.setInput(
            training.groundTruth,
            getNextSubtensor(trainingGroundTruth, i, batchSizeLocal))

        # Compute weights and biases derivatives on local node
        pres = netLocal.compute()

        partialResults = [0] * nNodes

        gatherPartialResultsFromNodes(pres, partialResults,
                                      partialResultsArchLength,
                                      partialResultLocalBuffer,
                                      partialResultMasterBuffer)

        wb = HomogenNumericTable()
        if rankId == MPI_ROOT:
            for node in range(nNodes):
                # Pass computed weights and biases derivatives to the master algorithm
                netMaster.input.add(training.partialResults, node,
                                    partialResults[node])

            # Update weights and biases on master node
            pres = netMaster.compute()
            wbModel = pres.get(training.resultFromMaster).get(training.model)
            wb = wbModel.getWeightsAndBiases()

        # Broadcast updated weights and biases to nodes
        wbLocal = broadcastWeightsAndBiasesToNodes(wb)
        netLocal.input.getStep1LocalInput(
            training.inputModel).setWeightsAndBiases(wbLocal)

    if rankId == MPI_ROOT:
        # Finalize neural network training on the master node
        res = netMaster.finalizeCompute()

        # Retrieve training and prediction models of the neural network
        trModel = res.get(training.model)
        predictionModel = trModel.getPredictionModel_Float32()

    return predictionModel

示例#21

0

显示文件

def getNumericTableFromCSV(csvFileName, Rows='All'):
    dataSource = FileDataSource(csvFileName,
                                DataSourceIface.doAllocateNumericTable,
                                DataSourceIface.doDictionaryFromContext)
    nT = HomogenNumericTable()
    if type(Rows) != str: dataSource.loadDataBlock(Rows, nT)
    elif Rows == 'All': dataSource.loadDataBlock(nT)
    else:
        warnings.warn('Type error in "Rows" arguments, Can be only int')
        raise SystemError
    return nT

示例#22

0

显示文件

    def test_ridge_regression_simple():

        # calculate beta coefficients
        x = np.array([0., 2., 3.]).reshape(3, 1)

        nt_x = nt_y = HomogenNumericTable(x)

        ridge_training_algorithm = ridge_training.Batch()
        # set input values
        ridge_training_algorithm.input.set(ridge_training.data, nt_x)
        ridge_training_algorithm.input.set(ridge_training.dependentVariables,
                                           nt_y)
        # check if intercept flag is set
        #ridge_training_algorithm.parameter.interceptFlag = True \
        #    if 'intercept' in self.parameters else True
        # set parameter
        alpha = 1.0
        alpha_nt = HomogenNumericTable(np.array([alpha], ndmin=2))
        ridge_training_algorithm.parameter.ridgeParameters = alpha_nt
        # calculate
        res = ridge_training_algorithm.compute()
        # return trained model
        model = res.get(ridge_training.model)
        beta_coeff = model.getBeta()
        np_beta_coeff = getNumpyArray(beta_coeff)

        res_beta_coeff = np.array([0.294118, 0.823529]).reshape(1, 2)

        assert_array_almost_equal(res_beta_coeff, np_beta_coeff)

        # predict
        ridge_prediction_algorithm = ridge_prediction.Batch_Float64DefaultDense(
        )
        ridge_prediction_algorithm.input.setModel(ridge_prediction.model,
                                                  model)
        ridge_prediction_algorithm.input.setTable(ridge_prediction.data, nt_x)

        result = ridge_prediction_algorithm.compute()
        np_predict = getNumpyArray(result.get(ridge_prediction.prediction))
        assert_array_almost_equal(x, np_predict, decimal=0)

示例#23

0

显示文件

文件： test_daal_regression.py 项目： yinxx/h2o4gpu

    def test_coeff_size(rows=10, columns=9):
        '''
        number of beta coefficients (with intercept flag on)
        is the same number as size of data sample
        '''
        inout = get_random_array(rows, columns)
        test_overfitting(rows, columns)
        x = inout[0]
        y = inout[1]

        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        lr_train = linear_training.Batch()
        lr_train.input.set(linear_training.data, ntX)
        lr_train.input.set(linear_training.dependentVariables, ntY)
        result = lr_train.compute()
        model = result.get(linear_training.model)
        beta_coeff = model.getBeta()
        np_beta = getNumpyArray(beta_coeff)

        assert y.transpose().shape == np_beta.shape, "Dependent variable size must have\

示例#24

0

显示文件

def getBlockOfNumericTable(nT, Rows='All', Columns='All'):
    from daal.data_management import HomogenNumericTable_Float64, \
    MergedNumericTable, readOnly, BlockDescriptor
    import numpy as np

    # Get First and Last Row indexes
    lastRow = nT.getNumberOfRows()
    if type(Rows) != str:
        if type(Rows) == list:
            firstRow = Rows[0]
            if len(Rows) == 2: lastRow = min(Rows[1], lastRow)
        else:
            firstRow = 0
            lastRow = Rows
    elif Rows == 'All':
        firstRow = 0
    else:
        warnings.warn(
            'Type error in "Rows" arguments, Can be only int/list type')
        raise SystemExit

    # Get First and Last Column indexes
    nEndDim = nT.getNumberOfColumns()
    if type(Columns) != str:
        if type(Columns) == list:
            nStartDim = Columns[0]
            if len(Columns) == 2: nEndDim = min(Columns[1], nEndDim)
        else:
            nStartDim = 0
            nEndDim = Columns
    elif Columns == 'All':
        nStartDim = 0
    else:
        warnings.warn(
            'Type error in "Columns" arguments, Can be only int/list type')
        raise SystemExit

    #Retrieve block of Columns Values within First & Last Rows
    #Merge all the retrieved block of Columns Values
    #Return merged numeric table
    mnT = MergedNumericTable()
    for idx in range(nStartDim, nEndDim):
        block = BlockDescriptor()
        nT.getBlockOfColumnValues(idx, firstRow, (lastRow - firstRow),
                                  readOnly, block)
        mnT.addNumericTable(HomogenNumericTable_Float64(block.getArray()))
        nT.releaseBlockOfColumnValues(block)
    block = BlockDescriptor()
    mnT.getBlockOfRows(0, mnT.getNumberOfRows(), readOnly, block)
    mnT = HomogenNumericTable(block.getArray())
    return mnT

示例#25

0

显示文件

文件： preprocessing.py 项目： kartik173/Intel-Hackathon

def callback(recognizer, audio):
    # received audio data, now we'll recognize it using Google Speech Recognition
    try:
        text = r.recognize_google(audio) 
        print( "you said: " + text )
        
        if "bye" in text:
            f=open('data.txt','a+')
            f.write("bye"+","+time.ctime()+","+"bye")
            sys.exit()
        review1 = re.sub('[^a-zA-Z]', ' ', text)
        review1 = review1.lower()
        review1 = word_tokenize(review1)
        reviews=""
        ps = PorterStemmer()
        for word in review1:
            if word in tokens:
                if word not in set(stopwords.words('english')):
                    reviews = reviews+" "+ps.stem(word)
        
        #review1 = ' '.join(review1)
        
        if reviews!="":
            do=[]
            for i in tokens:
                temp=[]
                
                k=reviews.split()
                temp.append(k.count(i))
                do.append(temp)
                        
            iar=np.array(do)
            iar=iar.T
            
            #print (len(corpus),len(tokens))
            reviewData = HomogenNumericTable(iar)
            pre=daal_svm.predict(trainingResult,reviewData)
            l=getArrayFromNT(pre)

            print(outcome[int(l[0][0])])
            pred.append([text,time.ctime(),outcome[int(l[0][0])]])
            
            f=open('data.txt','a+')
            f.write(text+","+time.ctime()+","+outcome[int(l[0][0])]+"\r\n")
            
            
    except sr.UnknownValueError:
        print("Google Speech Recognition could not understand audio")
    except sr.RequestError as e:
        print("Could not request results from Google Speech Recognition service; {0}".format(e))

示例#26

0

显示文件

文件： test_daal_regression.py 项目： teju85/h2o4gpu

    def test_intercept_flag(rows=10, columns=9):
        inout = get_random_array(rows, columns)
        test_overfitting(rows, columns)
        x = inout[0]
        y = inout[1]

        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        lr_train = linear_training.Batch()
        lr_train.input.set(linear_training.data, ntX)
        lr_train.input.set(linear_training.dependentVariables, ntY)
        result = lr_train.compute()
        model = result.get(linear_training.model)
        beta_coeff = model.getBeta()
        np_beta = getNumpyArray(beta_coeff)
        daal_intercept = np_beta[0, 0]

        from sklearn.linear_model.base import LinearRegression as ScikitLinearRegression
        regression = ScikitLinearRegression()
        regression.fit(x, y)

        scikit_intercept = regression.intercept_
        assert_array_almost_equal(scikit_intercept, [daal_intercept])

示例#27

0

显示文件

文件： neural_net_dense_distributed_mpi.py 项目： anjgola/samples

def broadcastWeightsAndBiasesToNodes(wb):

    wbBuffer = None
    # Serialize weights and biases on the root node
    if rankId == MPI_ROOT:
        if not wb:
            # Weights and biases table should be valid and not NULL on master
            return HomogenNumericTable()

        wbDataArch = InputDataArchive()
        wb.serialize(wbDataArch)
        wbBuffer = np.zeros(wbDataArch.getSizeOfArchive(), dtype=np.uint8)
        wbDataArch.copyArchiveToArray(wbBuffer)

    # Broadcast the serialized weights and biases
    wbBuffer = comm.bcast(wbBuffer)

    # Deserialize weights and biases
    wbDataArchLocal = OutputDataArchive(wbBuffer)

    wbLocal = HomogenNumericTable(ntype=np.float32)
    wbLocal.deserialize(wbDataArchLocal)

    return wbLocal

示例#28

0

显示文件

文件： __init__.py 项目： iburyl/Labs

def computeOutBlocks(nBlocks, dataBlock, dataBlockPartition):
    nRows = dataBlock.getNumberOfRows()
    blockIdFlags = np.zeros(nRows * nBlocks, dtype=np.uint8)

    (_, colIndices, rowOffsets) = dataBlock.getArrays()

    for i in range(nRows):
        for j in range(int(rowOffsets[i] - 1), int(rowOffsets[i + 1] - 1)):
            for k in range(1, nBlocks + 1):
                if dataBlockPartition[k - 1] <= colIndices[
                        j] - 1 and colIndices[j] - 1 < dataBlockPartition[k]:
                    blockIdFlags[(k - 1) * nRows + i] = 1

    nNotNull = [0] * nBlocks
    for i in range(nBlocks):
        nNotNull[i] = 0
        for j in range(nRows):
            nNotNull[i] += blockIdFlags[i * nRows + j]

    result = KeyValueDataCollection()

    for i in range(nBlocks):
        indicesTable = HomogenNumericTable(1,
                                           int(nNotNull[i]),
                                           NumericTableIface.doAllocate,
                                           ntype=np.intc)
        indices = indicesTable.getArray()
        indexId = 0

        for j in range(nRows):
            if blockIdFlags[i * nRows + j]:
                indices[indexId] = int(j)
                indexId += 1
        result[i] = indicesTable

    return result

示例#29

0

显示文件

文件： daal_mpi_ridge.py 项目： iburyl/Labs

def trainModel():
    global trainingResult
    masterAlgorithm = training.Distributed_Step2MasterFloat64NormEqDense()

    for filenameIndex in range(rankId, len(trainDatasetFileNames), comm_size):
        trainDataSource = FileDataSource(
            trainDatasetFileNames[filenameIndex],
            DataSourceIface.notAllocateNumericTable,
            DataSourceIface.doDictionaryFromContext)
        trainData = HomogenNumericTable(nFeatures, 0,
                                        NumericTableIface.notAllocate)
        trainDependentVariables = HomogenNumericTable(
            nDependentVariables, 0, NumericTableIface.notAllocate)
        mergedData = MergedNumericTable(trainData, trainDependentVariables)
        trainDataSource.loadDataBlock(mergedData)

        localAlgorithm = training.Distributed_Step1LocalFloat64NormEqDense()
        localAlgorithm.input.set(training.data, trainData)
        localAlgorithm.input.set(training.dependentVariables,
                                 trainDependentVariables)
        pres = localAlgorithm.compute()
        masterAlgorithm.input.add(training.partialModels, pres)

        mergedData.freeDataMemory()
        trainData.freeDataMemory()
        trainDependentVariables.freeDataMemory()

    pres = masterAlgorithm.compute()
    dataArch = InputDataArchive()
    pres.serialize(dataArch)
    nodeResults = dataArch.getArchiveAsArray()
    serializedData = comm.gather(nodeResults)

    if rankId == MPI_ROOT:
        print("Number of processes is %d." % (len(serializedData)))
        masterAlgorithm = training.Distributed_Step2MasterFloat64NormEqDense()

        for i in range(comm_size):
            dataArch = OutputDataArchive(serializedData[i])
            dataForStep2FromStep1 = training.PartialResult()
            dataForStep2FromStep1.deserialize(dataArch)
            masterAlgorithm.input.add(training.partialModels,
                                      dataForStep2FromStep1)
        masterAlgorithm.compute()
        trainingResult = masterAlgorithm.finalizeCompute()

示例#30

0

显示文件

文件： neural_net_dense_distributed_mpi.py 项目： anjgola/samples

def initializeNetwork():

    # Read training data set from a .csv file and create tensors to store input data
    trainingData = readTensorFromCSV(trainDatasetFileNames[rankId])
    trainingGroundTruth = readTensorFromCSV(trainGroundTruthFileNames[rankId],
                                            True)

    # Create AdaGrad optimization solver algorithm
    solver = optimization_solver.adagrad.Batch(ntpye=np.float32)

    # Set learning rate for the optimization solver used in the neural network
    learningRate = 0.001
    solver.parameter.learningRate = HomogenNumericTable(
        1, 1, NumericTableIface.doAllocate, learningRate)
    solver.parameter.batchSize = batchSizeLocal
    solver.parameter.optionalResultRequired = True
    trainingModel = None

    # Algorithms to train neural network
    netLocal = training.Distributed(step1Local)
    netMaster = training.Distributed(step2Master, solver)

    sampleSize = trainingData.getDimensions()
    sampleSize[0] = batchSizeLocal

    # Configure the neural network topology
    topology = configureNet()

    if rankId == MPI_ROOT:

        # Set the optimization solver for the neural network training
        netMaster.parameter.optimizationSolver = solver

        # Initialize the neural network on master node
        netMaster.initialize(sampleSize, topology)

        trainingModel = netMaster.getResult().get(training.model)
    else:
        # Configure the neural network on local nodes
        trainingModel = training.Model()
        trainingModel.initialize_Float32(sampleSize, topology)

    # Pass a model from master node to the algorithms on local nodes
    netLocal.input.setStep1LocalInput(training.inputModel, trainingModel)

    return (trainingData, trainingGroundTruth, netLocal, netMaster)