def printTensor3d(dataTable, message="", nFirstDim=0, nSecondDim=0, interval=10): dims = dataTable.getDimensions() nRows = int(dims[0]) nCols = int(dims[1]) if nFirstDim != 0: nFirstDim = min(nRows, nFirstDim) else: nFirstDim = nRows if nSecondDim != 0: nSecondDim = min(nCols, nSecondDim) else: nSecondDim = nCols block = SubtensorDescriptor() print(message) for i in range(nFirstDim): dataTable.getSubtensor([i], 0, nSecondDim, readOnly, block) nThirdDim = block.getSize() / nSecondDim printArray(block.getArray(), int(nThirdDim), int(nSecondDim), int(nThirdDim), "", interval) dataTable.releaseSubtensor(block)
def printTensor(dataTable, message="", nPrintedRows=0, nPrintedCols=0, interval=10): dims = dataTable.getDimensions() nRows = int(dims[0]) if nPrintedRows != 0: nPrintedRows = min(nRows, nPrintedRows) else: nPrintedRows = nRows block = SubtensorDescriptor() dataTable.getSubtensor([], 0, nPrintedRows, readOnly, block) nCols = int(block.getSize() / nPrintedRows) if nPrintedCols != 0: nPrintedCols = min(nCols, nPrintedCols) else: nPrintedCols = nCols printArray(block.getArray(), int(nPrintedCols), int(nPrintedRows), int(nCols), message, interval) dataTable.releaseSubtensor(block)
def _readBatchFromDataset(self, in_file, counter): """Reads batch of images coresponding to the current reader position""" imagesBatchSize = self._imagesInBatch * self._imageChannels * self._imageWidth * self._imageHeight batchPosition = self._imagesPosition + imagesBatchSize * counter in_file.seek(batchPosition) dataBatch = allocateTensor(np.float32, self._imagesInBatch, self._imageChannels, self._imageHeight, self._imageWidth) trainTensorSize = dataBatch.getSize() batchBlock = SubtensorDescriptor(ntype=np.float32) dataBatch.getSubtensor([], 0, self._imagesInBatch, writeOnly, batchBlock) objectsPtr = batchBlock.getArray() binary_data_str = in_file.read(trainTensorSize) objectData = np.array(struct.unpack('B' * trainTensorSize, binary_data_str), dtype=np.float32) for x, i in zip(np.nditer(objectsPtr, op_flags=['readwrite']), range(len(objectData))): x[...] = objectData[i] dataBatch.releaseSubtensor(batchBlock) return dataBatch
def __init__(self, tensor, read_type=np.float32): if not isinstance(tensor, Tensor): raise ValueError(PYDAAL_NOT_A_TENSOR % type(tensor)) self.tensor = tensor self.block = SubtensorDescriptor(ntype=read_type) self.tensor.getSubtensor([], 0, tensor.getDimensionSize(0), readOnly, self.block)
def findClasses(dataTable): dims1 = dataTable.getDimensions() nRows1 = int(dims1[0]) block1 = SubtensorDescriptor() dataTable.getSubtensor([], 0, nRows1, readOnly, block1) nCols1 = int(block1.getSize() / nRows1) dataType = block1.getArray().flatten() dataType = np.reshape(dataType, (nRows1, nCols1)) classes = np.argmax(dataType, axis=1) dataTable.releaseSubtensor(block1) return classes
def predict(self, data, batch_size=None, rebuild=True): """Predicts labels based on a prediction model. Supported notation is ``with net.predict(...) as predictions:`` Args: data (:obj:`daal.data_management.Tensor` or :obj:`numpy.ndarray`): Prediction data. batch_size (:obj:`int`): Batch size for processing prediction data. rebuild (:obj:`bool`): Control parameter to force rebuild of the model. Returns: :py:class:`pydaalcontrib.nn.DAALNet`: DAAL network with the evaluated predictions. Raises: ValueError: If the provided ``data`` are of the wrong type. """ if isinstance(data, np.ndarray): _data = HomogenTensor(data.copy(), ntype=data.dtype) elif not isinstance(data, Tensor): raise ValueError('Data is not of numpy.ndarray or Tensor type!') if not batch_size or batch_size > _data.getDimensionSize(0): batch_size = _data.getDimensionSize(0) if rebuild and self.do_rebuild: #TODO: refactor set rebuild=False once memory allocation is fixed on prediction in Intel DAAL 2018 parameter = prediction.Parameter() parameter.batchSize = batch_size self.do_rebuild = False rebuild_args = { 'data_dims': [batch_size] + _data.getDimensions()[1:], 'parameter': parameter } self.model = self.build_model(self.descriptor, False, rebuild=rebuild_args, **self.build_args) elif 'train_result' in self.__dict__: self.model = self.train_result.get( training.model).getPredictionModel_Float32() net = prediction.Batch() net.parameter.batchSize = batch_size net.input.setModelInput(prediction.model, self.model) net.input.setTensorInput(prediction.data, _data) self.predictions = SubtensorDescriptor(ntype=data.dtype) self.predict_result = net.compute().getResult(prediction.prediction) self.predict_result.getSubtensor( [], 0, self.predict_result.getDimensionSize(0), readOnly, self.predictions) return self
def getNextSubtensor(inputTensor, startPos, nElements): dims = inputTensor.getDimensions() dims[0] = nElements subtensorBlock = SubtensorDescriptor(ntype=np.float32) inputTensor.getSubtensor([], startPos, nElements, readOnly, subtensorBlock) subtensorData = np.array(subtensorBlock.getArray(), copy=True, dtype=np.float32) inputTensor.releaseSubtensor(subtensorBlock) return HomogenTensor(subtensorData, ntype=np.float32)
class DataReader: """Wrapper class for reading Intel DAAL tensors. Supported notation is ``with DataReader(...) as result:``. Args: tensor (:obj:`daal.data_management.Tensor`): Provided tensor. read_type (:obj:`numpy.dtype`, optional): Numpy type for the result tensor. Raises: ValueError: If provided argument is not a :obj:`daal.data_management.Tensor`. """ def __init__(self, tensor, read_type=np.float32): if not isinstance(tensor, Tensor): raise ValueError(PYDAAL_NOT_A_TENSOR % type(tensor)) self.tensor = tensor self.block = SubtensorDescriptor(ntype=read_type) self.tensor.getSubtensor([], 0, tensor.getDimensionSize(0), readOnly, self.block) def __enter__(self): return self.block.getArray() def __exit__(self, type, value, traceback): self.tensor.releaseSubtensor(self.block)
def printTensors(dataTable1, dataTable2, title1="", title2="", message="", nPrintedRows=0, interval=15): dims1 = dataTable1.getDimensions() nRows1 = int(dims1[0]) if nPrintedRows != 0: nPrintedRows = min(nRows1, nPrintedRows) else: nPrintedRows = nRows1 block1 = SubtensorDescriptor() dataTable1.getSubtensor([], 0, nPrintedRows, readOnly, block1) nCols1 = int(block1.getSize() / nPrintedRows) dims2 = dataTable2.getDimensions() nRows2 = int(dims2[0]) if nPrintedRows != 0: nPrintedRows = min(nRows2, nPrintedRows) else: nPrintedRows = nRows2 block2 = SubtensorDescriptor() dataTable2.getSubtensor([], 0, nPrintedRows, readOnly, block2) nCols2 = int(block2.getSize() / nPrintedRows) dataType1 = block1.getArray().flatten() dataType2 = block2.getArray().flatten() print(message) print("{:<{width}}".format(title1, width=(interval * nCols1)), end='') print("{:<{width}}".format(title2, width=(interval * nCols2))) for i in range(nPrintedRows): for j in range(nCols1): print("{v:<{width}.0f}".format(v=dataType1[i * nCols1 + j], width=interval), end='') for j in range(nCols2): print("{:<{width}.3f}".format(dataType2[i * nCols2 + j], width=int(interval / 2)), end='') print() print() dataTable1.releaseSubtensor(block1) dataTable2.releaseSubtensor(block2)
def _readGroundTruthFromDataset(self, in_file, counter): """Reads batch of labels coresponding to the current reader position""" batchLabelsSize = self._imagesInBatch * 4 batchPosition = self._classesPosition + batchLabelsSize * counter in_file.seek(batchPosition) groundTruthBatch = allocateTensor(np.intc, self._imagesInBatch, 1) groundTruthBlock = SubtensorDescriptor(ntype=np.intc) groundTruthBatch.getSubtensor([], 0, self._imagesInBatch, writeOnly, groundTruthBlock) groundTruthPtr = groundTruthBlock.getArray() for x in np.nditer(groundTruthPtr, op_flags=['readwrite']): x[...] = int(self._readDWORD(in_file)) groundTruthBatch.releaseSubtensor(groundTruthBlock) return groundTruthBatch
def update(self, _prediction, _groundTruth): if not _prediction: raise RuntimeError("Prediction tensor should not be null") if not _groundTruth: raise RuntimeError("GroundTruth tensor should not be null") dimensions = _prediction.getDimensions() if len(dimensions) != 2: raise RuntimeError( "Predictions tensor should have exactly two dimensions") rowsNum = dimensions[0] colsNum = dimensions[1] if colsNum < ClassificationErrorCounter.MAX_ERROR_RATE_CLASSES: raise RuntimeError( "Number of classes in prediction result is not enough to compute error rate" ) predictionBlock = SubtensorDescriptor(ntype=np.float32) _prediction.getSubtensor([], 0, dimensions[0], readOnly, predictionBlock) predictionRows = predictionBlock.getArray() groundTruthBlock = SubtensorDescriptor(ntype=np.intc) _groundTruth.getSubtensor([], 0, dimensions[0], readOnly, groundTruthBlock) groundTruthClasses = groundTruthBlock.getArray() for i in range(rowsNum): row = predictionRows[i] topIndices = self.findTopIndices( row, colsNum, ClassificationErrorCounter.MAX_ERROR_RATE_CLASSES) groundTruthClass = groundTruthClasses[0][i] self._totalObjects += 1 if groundTruthClass in topIndices: self._top5ClassifiedObjects += 1 if groundTruthClass == topIndices[0]: self._top1ClassifiedObjects += 1 _prediction.releaseSubtensor(predictionBlock) _groundTruth.releaseSubtensor(groundTruthBlock)
def printPredictedClasses(_predictionResult, _testingGroundTruth): _prediction = _predictionResult.getResult(prediction.prediction) predictionDimensions = _prediction.getDimensions() predictionBlock = SubtensorDescriptor() _prediction.getSubtensor([], 0, predictionDimensions[0], readOnly, predictionBlock) predictionPtr = predictionBlock.getArray() testGroundTruthBlock = SubtensorDescriptor(ntype=np.intc) _testingGroundTruth.getSubtensor([], 0, predictionDimensions[0], readOnly, testGroundTruthBlock) testGroundTruthPtr = testGroundTruthBlock.getArray().flatten() # Print predicted classes maxPIndex = np.argmax(predictionPtr, axis=1) for i in range(predictionDimensions[0]): for p in predictionPtr[i]: print("{:.4f} ".format(p), end="") print(" -> {} | {}".format(maxPIndex[i], testGroundTruthPtr[i])) _prediction.releaseSubtensor(predictionBlock) _testingGroundTruth.releaseSubtensor(testGroundTruthBlock)
def checkResult(predictionResult, testingGroundTruth, TestDataCount): pred = predictionResult.getResult(prediction.prediction) predictionDimensions = pred.getDimensions() predictionBlock = SubtensorDescriptor() pred.getSubtensor([], 0, predictionDimensions[0], readOnly, predictionBlock) predictionPtr = predictionBlock.getArray() testGroundTruthBlock = SubtensorDescriptor(ntype=np.intc) testingGroundTruth.getSubtensor([], 0, predictionDimensions[0], readOnly, testGroundTruthBlock) testGroundTruthPtr = testGroundTruthBlock.getArray().flatten() maxPIndex = 0 trueCount = 0 # validation accuracy finding maxPIndex = np.argmax(predictionPtr, axis=1) trueCount = np.sum(maxPIndex == testGroundTruthPtr) pred.releaseSubtensor(predictionBlock) testingGroundTruth.releaseSubtensor(testGroundTruthBlock) return True if trueCount / TestDataCount > 0.9 else False
class DAALNet: """Wrapper class for working with :obj:`daal.algorithms.neural_networks` package. Notes Default working regime is training, see :obj:`daal.algorithms.neural_networks.training.Batch()`. Default solver used for training is SGD, see :obj:`daal.algorithms.optimization_solver.sgd.Batch()`. """ _daal_net_namespace = dict() def __init__(self): #TODO: set do_rebuild=False once memory allocation is fixed on prediction in Intel DAAL 2018 self.do_rebuild = True self.initializer = None self.solver = sgd.Batch() self.net = training.Batch(self.solver) def with_solver(self, solver): """Provides a specific solver for the Intel DAAL net/graph. Args: solver (from :obj:`daal.algorithms.optimization_solver` module): Intel DAAL solver. Returns: :py:class:`pydaalcontrib.nn.DAALNet`: Intel DAAL network with the provided solver. """ self.solver = solver self.net = training.Batch(self.solver) return self def with_initializer(self, initializer): self.initializer = initializer return self def train(self, data, labels, **kw_args): """Trains a specific Intel DAAL net/graph based on the provided data and labels. Args: data (:obj:`daal.data_management.Tensor` or :obj:`numpy.ndarray`): Training data. labels (:obj:`daal.data_management.Tensor` or :obj:`numpy.ndarray`): Training labels. **kwargs: Arbitrary keyword arguments (``batch_size`` and ``learning_rate``). Returns: :py:class:`pydaalcontrib.nn.DAALNet`: Trained DAAL network. Raises: ValueError: If the provided ``data`` or ``labels`` are of the wrong type or the topology is not set. """ if 'topology' not in self.__dict__: raise ValueError('Topology is not intialized!') if 'batch_size' in kw_args and 'result' not in self.__dict__: self.solver.parameter.batchSize = kw_args['batch_size'] if 'learning_rate' in kw_args and 'learningRate' in self.solver.parameter.__swig_getmethods__: self.solver.parameter.learningRate = get_learning_rate( kw_args['learning_rate']) if 'learning_rate' in kw_args and 'learningRateSequence' in self.solver.parameter.__swig_getmethods__: self.solver.parameter.learningRateSequence = get_learning_rate( kw_args['learning_rate']) if isinstance(data, Tensor): self.data = data elif isinstance(data, np.ndarray) and data.base is not None: self.data = HomogenTensor(data.copy(), ntype=data.dtype) elif isinstance(data, np.ndarray) and data.base is None: self.data = HomogenTensor(data, ntype=data.dtype) else: raise ValueError('Data is not of numpy.ndarray or Tensor type!') if isinstance(labels, Tensor): self.labels = labels elif isinstance(labels, np.ndarray): if len(labels.shape) == 1: labels = labels.reshape([-1, 1]) if issubdtype(labels, np.int): labels = labels.astype(np.intc) elif not issubdtype(labels, np.float): labels = labels.astype(np.float) self.labels = HomogenTensor(labels.copy(), ntype=labels.dtype) else: raise ValueError('Labels are not of numpy.ndarray or Tensor type!') if 'train_result' not in self.__dict__ or self.train_result is None: dims = self.data.getDimensions()[1:] dims.insert(0, self.solver.parameter.batchSize) self.net.initialize(dims, self.topology) # heuristically define the number of iterations for ``self.solver`` batch_size = np.float(self.solver.parameter.batchSize) n_iter = np.ceil(self.data.getDimensionSize(0) / batch_size) self.solver.parameter.nIterations = np.int(n_iter) # Pass a solver, training data and lables to the algorithm self.net.parameter.optimizationSolver = self.solver self.net.input.setInput(training.data, self.data) self.net.input.setInput(training.groundTruth, self.labels) # Do an actual compute and store the result self.train_result = self.net.compute() self.do_rebuild = False return self #TODO: refactor set rebuild=False once memory allocation is fixed on prediction in Intel DAAL 2018 def predict(self, data, batch_size=None, rebuild=True): """Predicts labels based on a prediction model. Supported notation is ``with net.predict(...) as predictions:`` Args: data (:obj:`daal.data_management.Tensor` or :obj:`numpy.ndarray`): Prediction data. batch_size (:obj:`int`): Batch size for processing prediction data. rebuild (:obj:`bool`): Control parameter to force rebuild of the model. Returns: :py:class:`pydaalcontrib.nn.DAALNet`: DAAL network with the evaluated predictions. Raises: ValueError: If the provided ``data`` are of the wrong type. """ if isinstance(data, np.ndarray): _data = HomogenTensor(data.copy(), ntype=data.dtype) elif not isinstance(data, Tensor): raise ValueError('Data is not of numpy.ndarray or Tensor type!') if not batch_size or batch_size > _data.getDimensionSize(0): batch_size = _data.getDimensionSize(0) if rebuild and self.do_rebuild: #TODO: refactor set rebuild=False once memory allocation is fixed on prediction in Intel DAAL 2018 parameter = prediction.Parameter() parameter.batchSize = batch_size self.do_rebuild = False rebuild_args = { 'data_dims': [batch_size] + _data.getDimensions()[1:], 'parameter': parameter } self.model = self.build_model(self.descriptor, False, rebuild=rebuild_args, **self.build_args) elif 'train_result' in self.__dict__: self.model = self.train_result.get( training.model).getPredictionModel_Float32() net = prediction.Batch() net.parameter.batchSize = batch_size net.input.setModelInput(prediction.model, self.model) net.input.setTensorInput(prediction.data, _data) self.predictions = SubtensorDescriptor(ntype=data.dtype) self.predict_result = net.compute().getResult(prediction.prediction) self.predict_result.getSubtensor( [], 0, self.predict_result.getDimensionSize(0), readOnly, self.predictions) return self def get_predictions(self): """Gets the latest predictions after :py:meth:`predict` was called. Returns: :py:obj:`numpy.ndarray`: Evaluated predictions. """ if 'predictions' in self.__dict__: predictions_numpy = self.predictions.getArray() self.predict_result.releaseSubtensor(self.predictions) return predictions_numpy else: return None def __enter__(self): return self.predictions.getArray() def __exit__(self, type, value, traceback): self.predict_result.releaseSubtensor(self.predictions) def allocate_model(self, model, args): """Allocates a contiguous memory for the model if 'rebuild' option is specified. Args: model (:obj:`daal.algorithms.neural_networks.prediction.Model`): instantiated model. args (:obj:`dict`): Different args which are passed from :py:func:`build_model`. Returns: :obj:`daal.algorithms.neural_networks.prediction.Model` """ if 'rebuild' in args: parameter = args['rebuild']['parameter'] data_dims = args['rebuild']['data_dims'] model.allocate_Float32(data_dims, parameter) return model def build_model(self, model, trainable, **kw_args): """(re)Builds a specific Intel DAAL model based on the provided descriptor. Args: model (:py:class:`pydaalcontrib.model.ModelBase` or :obj:`str`): Instance of a model or a path to the folder/file containing the model (*pydaal.model*) file. trainable (:obj:`bool`): Flag indicating whether `training` or `prediction` topology to be built. kw_args (:obj:`dict`): Different keyword args which might be of use in sub-classes. Returns: :obj:`daal.algorithms.neural_networks.prediction.Model` or ``None`` """ if 'model' in self.__dict__: return self.allocate_model(self.model, kw_args) if isinstance(model, basestring): self.descriptor = load_model(model) else: self.descriptor = model self.topology = build_topology(self.descriptor, trainable, initializer=self.initializer) #TODO: replace with training.Model(topology) once fixed return None if trainable else self.allocate_model( prediction.Model(self.topology), kw_args) @dispatch(basestring, namespace=_daal_net_namespace) def build(self, model_path, trainable=False, **kw_args): self.model = self.build_model(model_path, trainable, **kw_args) self.build_args = {'model_path': model_path} self.build_args.update(kw_args) return self @dispatch(Model, namespace=_daal_net_namespace) def build(self, model, trainable=True, **kw_args): self.model = self.build_model(model, trainable, **kw_args) self.build_args = kw_args return self