def backwardParamsRnn(data, outdata, w, trainReserve, descRnn, inithidden=None): assert data.ndim == 3 and data.dtype == np.float32 and descRnn.insize == data.shape[ 2] assert outdata.ndim == 3 and outdata.dtype == data.dtype assert w.ndim == 1 and w.dtype == np.float32 seqlen, batchsize, _ = data.shape if descRnn.dir == DirectionMode.uni: assert outdata.shape[2] == descRnn.hsize dims, strides = (descRnn.layers, batchsize, descRnn.hsize), (batchsize * descRnn.hsize, descRnn.hsize, 1) else: assert outdata.shape[2] == 2 * descRnn.hsize dims, strides = (2 * descRnn.layers, batchsize, descRnn.hsize), (batchsize * descRnn.hsize, descRnn.hsize, 1) if inithidden is not None: assert inithidden.dtype == np.float32 and inithidden.shape == dims else: inithidden = Driver.zeros(queue, dims, dtype=np.float32, allocator=memPool) descHx = createDescribedNdTensor(dims, strides, inithidden) descDatas = [] descOutDatas = [] for d in range(data.shape[0]): descDatas.append(createDescribedNdTensor(None, None, data[0])) descOutDatas.append(createDescribedNdTensor(None, None, outdata[0])) indescs, outdescs = [d.desc for d in descDatas], [d.desc for d in descOutDatas] dw = Driver.zeros(queue, w.shape, dtype=np.float32, allocator=memPool) descDw = createDescribedNdTensor(None, None, dw) workspace, reserveSpace = trainReserve libmiopen.miopenRNNBackwardWeights(context, descRnn.desc, seqlen, indescs, data.int_ptr, descHx.desc, descHx.ptr, outdescs, outdata.int_ptr, descDw.desc, descDw.ptr, workspace.int_ptr, workspace.nbytes, reserveSpace.int_ptr, reserveSpace.nbytes) destroyDescribedTensors(*descDatas, *descOutDatas, descHx, descDw) return dw
def setRnnParam(descRnn, layer, descData, descW, linLayer, linLayerMat, linLayerBias): descLinLayerMat = createDescribedNdTensor(None, None, linLayerMat) libmiopen.miopenSetRNNLayerParam(context, descRnn.desc, layer, descData.desc, descW.desc, descW.ptr, linLayer, descLinLayerMat.desc, descLinLayerMat.ptr) descLinLayerBias = createDescribedNdTensor(None, None, linLayerBias) libmiopen.miopenSetRNNLayerBias(context, descRnn.desc, layer, descData.desc, descW.desc, descW.ptr, linLayer, descLinLayerBias.desc, descLinLayerBias.ptr) destroyDescribedTensors(descLinLayerMat, descLinLayerBias)
def updateRnnParams(descRnn, w, params): descData = createDescribedNdTensor(dims=(1, descRnn.insize), strides=(descRnn.insize, 1), tensor=None) descW = createDescribedNdTensor(None, None, w) if descRnn.mode == RNNMode.relu or descRnn.mode == RNNMode.tanh: updateNativeRnnParams(descRnn, descData, descW, params) elif descRnn.mode == RNNMode.lstm: updateLSTMParams(descRnn, descData, descW, params) elif descRnn.mode == RNNMode.gru: updateGRUParams(descRnn, descData, descW, params) else: raise NotImplementedError() destroyDescribedTensors(descData, descW)
def createRnnParams(descRnn, insize, dataType=DataType.float, w=None): descData = createDescribedNdTensor(dims=(1, insize), strides=(insize, 1), tensor=None) if dataType != DataType.float: raise NotImplementedError() wsize = libmiopen.miopenGetRNNParamsSize(context, descRnn.desc, descData.desc, dataType.value) nparams = wsize // np.float32(0).itemsize if w is None: w = Driver.empty(queue, (nparams, ), dtype=np.float32, allocator=memPool) elif w.nbytes != wsize: raise RuntimeError("Bad weights buffer size (got %s, expected %s)" % (w.nbytes, wsize)) destroyDescribedTensors(descData) return w
def backwardDataRnn(grad, outdata, w, trainReserve, descRnn, inithidden=None, initcells=None): assert grad.ndim == 3 and grad.dtype == np.float32 assert outdata.shape == grad.shape and outdata.dtype == grad.dtype _, batchsize, _ = grad.shape useHidden = True if inithidden is not None else False useCells = True if initcells is not None else False seqlen = outdata.shape[0] assert w.ndim == 1 and w.dtype == np.float32 if descRnn.dir == DirectionMode.uni: assert grad.shape[-1] == descRnn.hsize dims, strides = (descRnn.layers, batchsize, descRnn.hsize), (batchsize * descRnn.hsize, descRnn.hsize, 1) else: assert grad.shape[-1] == 2 * descRnn.hsize dims, strides = (2 * descRnn.layers, batchsize, descRnn.hsize), (batchsize * descRnn.hsize, descRnn.hsize, 1) if inithidden is not None: assert inithidden.dtype == np.float32 and inithidden.shape == dims else: inithidden = Driver.zeros(queue, dims, dtype=np.float32, allocator=memPool) if descRnn.mode == RNNMode.lstm: if initcells is not None: assert initcells.dtype == np.float32 and initcells.shape == dims else: initcells = Driver.zeros(queue, dims, dtype=np.float32, allocator=memPool) descHx = createDescribedNdTensor(dims, strides, inithidden) descCx = createDescribedNdTensor(dims, strides, initcells) descDHx = createDescribedNdTensor( None, None, Driver.empty(queue, dims, dtype=np.float32, allocator=memPool)) descDCx = createDescribedNdTensor( None, None, Driver.empty(queue, dims, dtype=np.float32, allocator=memPool)) descDHy = createDescribedNdTensor( None, None, Driver.zeros(queue, dims, dtype=np.float32, allocator=memPool)) descDCy = createDescribedNdTensor( None, None, Driver.zeros(queue, dims, dtype=np.float32, allocator=memPool)) ingrad = Driver.zeros(queue, outdata.shape[:2] + (descRnn.insize, ), dtype=np.float32, allocator=memPool) descInGrads, descGrads, descOutDatas = [], [], [] for d in range(seqlen): descInGrads.append(createDescribedNdTensor(None, None, ingrad[0])) descGrads.append(createDescribedNdTensor(None, None, grad[0])) descOutDatas.append(createDescribedNdTensor(None, None, outdata[0])) ingraddescs, graddescs = [d.desc for d in descInGrads ], [d.desc for d in descGrads] outdatadescs = [d.desc for d in descOutDatas] descW = createDescribedNdTensor(None, None, w) workspace, reserveSpace = trainReserve libmiopen.miopenRNNBackwardData( context, descRnn.desc, seqlen, outdatadescs, outdata.int_ptr, graddescs, grad.int_ptr, descDHy.desc, descDHy.ptr, descDCy.desc, descDCy.ptr, descW.desc, descW.ptr, descHx.desc, descHx.ptr, descCx.desc, descCx.ptr, ingraddescs, ingrad.int_ptr, descDHx.desc, descDHx.ptr, descDCx.desc, descDCx.ptr, workspace.int_ptr, workspace.nbytes, reserveSpace.int_ptr, reserveSpace.nbytes) destroyDescribedTensors(*descInGrads, *descGrads, *descOutDatas, descHx, descCx, descDHx, descDCx, descW) tup = (ingrad, trainReserve) if useHidden: tup = tup + (descDHx.tensor, ) if useCells: tup = tup + (descDCx.tensor, ) return tup
def forwardRnn(data, w, descRnn, inithidden=None, initcells=None, test=False): assert data.ndim == 3 and data.dtype == np.float32 and descRnn.insize == data.shape[ 2] assert w.ndim == 1 and w.dtype == np.float32 seqlen, batchsize, _ = data.shape if descRnn.dir == DirectionMode.uni: hsize = descRnn.hsize dims, strides = (descRnn.layers, batchsize, hsize), (batchsize * hsize, hsize, 1) else: hsize = 2 * descRnn.hsize dims, strides = (2 * descRnn.layers, batchsize, descRnn.hsize), (batchsize * descRnn.hsize, descRnn.hsize, 1) if inithidden is not None: assert inithidden.dtype == np.float32 and inithidden.shape == dims else: inithidden = Driver.zeros(queue, dims, dtype=np.float32, allocator=memPool) if descRnn.mode == RNNMode.lstm: if initcells is not None: assert initcells.dtype == np.float32 and initcells.shape == dims else: initcells = Driver.zeros(queue, dims, dtype=np.float32, allocator=memPool) descHx = createDescribedNdTensor(dims, strides, inithidden) descCx = createDescribedNdTensor(dims, strides, initcells) descHy = createDescribedNdTensor( None, None, Driver.empty(queue, dims, dtype=np.float32, allocator=memPool)) descCy = createDescribedNdTensor( None, None, Driver.empty(queue, dims, dtype=np.float32, allocator=memPool)) outdata = Driver.empty(queue, data.shape[:2] + (hsize, ), dtype=np.float32, allocator=memPool) descDatas = [] descOutDatas = [] for d in range(data.shape[0]): descDatas.append(createDescribedNdTensor(None, None, data[0])) descOutDatas.append(createDescribedNdTensor(None, None, outdata[0])) indescs, outdescs = [d.desc for d in descDatas], [d.desc for d in descOutDatas] descW = createDescribedNdTensor(None, None, w) reserveSize = libmiopen.miopenGetRNNTrainingReserveSize( context, descRnn.desc, seqlen, indescs) reserveSpace = Driver.zeros(queue, (reserveSize // np.float32(0).itemsize, ), dtype=np.float32, allocator=memPool) workspaceSize = libmiopen.miopenGetRNNWorkspaceSize( context, descRnn.desc, seqlen, indescs) workspace = Driver.empty(queue, (workspaceSize, ), dtype=np.uint8, allocator=memPool) tup = outdata if not test: tup = (outdata, (workspace, reserveSpace)) libmiopen.miopenRNNForwardTraining( context, descRnn.desc, seqlen, indescs, data.int_ptr, descHx.desc, descHx.ptr, descCx.desc, descCx.ptr, descW.desc, descW.ptr, outdescs, outdata.int_ptr, descHy.desc, descHy.ptr, descCy.desc, descCy.ptr, workspace.int_ptr, workspaceSize, reserveSpace.int_ptr, reserveSize) else: libmiopen.miopenRNNForwardInference( context, descRnn.desc, seqlen, indescs, data.int_ptr, descHx.desc, descHx.ptr, descCx.desc, descCx.ptr, descW.desc, descW.ptr, outdescs, outdata.int_ptr, descHy.desc, descHy.ptr, descCy.desc, descCy.ptr, workspace.int_ptr, workspaceSize) destroyDescribedTensors(*descDatas, *descOutDatas, descHx, descCx, descHy, descCy, descW) return tup