示例#1
0
def batchNorm2dTest():
    batchsize, maps, h, w = 4, 5, 3, 2

    data = CPUArray.toDevice(
        np.random.randn(batchsize, maps, h, w).astype(np.float32))
    hostData = data.get()

    scale = CPUArray.toDevice(
        np.random.randn(1, maps, 1, 1).astype(np.float32))
    bias = CPUArray.toDevice(np.random.randn(1, maps, 1, 1).astype(np.float32))
    mean = CPUArray.toDevice(np.random.randn(1, maps, 1, 1).astype(np.float32))
    var = CPUArray.toDevice(
        (np.ones((1, maps, 1, 1)).astype(np.float32) +
         np.random.randn(1, maps, 1, 1).astype(np.float32))**2)

    outdata = batchNorm2d(data, scale, bias, mean, var, test=True)

    hostScale, hostBias, hostMean, hostVar = scale.get(), bias.get(), mean.get(
    ), var.get()
    hostNormData = np.empty(hostData.shape, dtype=np.float32)
    hostOutData = np.empty(hostData.shape, dtype=np.float32)

    for c in range(maps):
        hostNormData[:, c, :, :] = (hostData[:, c, :, :] - hostMean[0, c, 0, 0]
                                    ) / np.sqrt(hostVar[0, c, 0, 0] + 1e-5)
        hostOutData[:, c, :, :] = hostNormData[:, c, :, :] * hostScale[
            0, c, 0, 0] + hostBias[0, c, 0, 0]

    assert np.allclose(hostOutData, outdata.get())
示例#2
0
def crossEntropy(scores, labels, weights=None, error=None):
    assert scores.dtype == np.float32 and labels.dtype == np.int32

    shape = scores.shape
    if scores.ndim < 4:
        scores = scores.reshape(*shape, *(1 for _ in range(4 - scores.ndim)))

    softmax = softmaxNd(scores)

    grad = CPUArray.empty(shape, dtype=np.float32)
    if error is None:
        error = CPUArray.empty((), dtype=np.float32)

    error.fill(0.0)

    spatialDim = int(np.prod(scores.shape[2:]))
    mapStride = spatialDim * scores.shape[1]

    if weights is None:
        ceMod.cost(softmax.data, labels.data, mapStride, spatialDim,
                   scores.shape[1], scores.shape[0], error.data, grad.data,
                   softmax.size)

    else:
        wceMod.cost(softmax.data, labels.data, weights.data, mapStride,
                    spatialDim, shape[1], shape[0], error.data, grad.data,
                    softmax.size)

    return error, grad
示例#3
0
def svmTest():
    batchsize, size = 20, 4

    scores = CPUArray.toDevice(
        np.random.randn(batchsize, size).astype(np.float32))
    labels = CPUArray.toDevice(
        np.random.randint(low=0, high=size, size=(batchsize, ),
                          dtype=np.int32))

    error, grad = svm(scores, labels, mode="l1")

    hostScores, hostLabels = scores.get(), labels.get()

    hostGrad = np.empty(grad.shape, dtype=np.float32)
    hostError = 0.0

    for b in range(batchsize):
        for n in range(size):
            cls = 2 * (hostLabels[b] == n) - 1
            val = hostScores[b, n] * cls

            hostGrad[b, n] = cls / batchsize / size if val < 1 else 0.0
            hostError += max(0.0, 1.0 - val) / batchsize / size

    assert np.allclose(hostGrad, grad.get())
    assert np.isclose(hostError, error.get() / scores.shape[0])
示例#4
0
def conv2dTest():
    batchsize, inmaps, h, w = 1, 2, 6, 6
    fsize, outmaps = 2, 4

    data = CPUArray.toDevice(
        np.random.randn(batchsize, inmaps, h, w).astype(np.float32))

    W = CPUArray.toDevice(
        np.random.randn(outmaps, inmaps, fsize, fsize).astype(np.float32))
    bias = CPUArray.toDevice(
        np.random.randn(1, outmaps, 1, 1).astype(np.float32))

    outdata = conv2d(data, W, bias)

    hostData, hostW, hostBias = data.get(), W.get(), bias.get()
    hostOutData = np.empty(outdata.shape, dtype=np.float32)

    for c in range(outmaps):
        hostOutData[:, c, :, :] = hostBias[0, c, 0, 0]

    for b in range(batchsize):
        for oc in range(outmaps):
            for ic in range(inmaps):
                for y in range(outdata.shape[2]):
                    for x in range(outdata.shape[3]):
                        for dy in range(fsize):
                            for dx in range(fsize):
                                hostOutData[b, oc, y,
                                            x] += hostData[b, ic, y + dy, x +
                                                           dx] * hostW[oc, ic,
                                                                       dy, dx]

    assert np.allclose(hostOutData, outdata.get())
示例#5
0
def unittest():
    batchsize, maps, h, w = 3, 4, 5, 5
    epsilon = 1e-5

    data = CPUArray.toDevice(
        np.random.randn(batchsize, maps, h, w).astype(np.float32))
    scale = CPUArray.toDevice(
        np.random.randn(1, maps, 1, 1).astype(np.float32))
    bias = CPUArray.toDevice(np.random.randn(1, maps, 1, 1).astype(np.float32))

    outdata, savemean, savevar, extscale, extbias, desc = instanceNorm2d(
        data, scale, bias, epsilon)

    hostData = data.get().reshape(data.shape[0] * data.shape[1], -1)
    hostScale, hostBias = scale.get().reshape(maps,
                                              1), bias.get().reshape(maps, 1)
    hostExtScale, hostExtBias = np.tile(hostScale, (batchsize, 1)), np.tile(
        hostBias, (batchsize, 1))

    hostMean = np.mean(hostData, axis=1, keepdims=True)
    hostVar = np.var(hostData, axis=1)
    hostInvVar = 1.0 / np.sqrt(hostVar + epsilon)
    hostOutData = (hostData - hostMean) * hostInvVar[:, np.newaxis]
    hostOutScData = hostOutData * hostExtScale + hostExtBias

    assert np.allclose(hostOutScData.reshape(data.shape), outdata.get())
    assert np.allclose(hostMean.reshape(savemean.shape), savemean.get())
    assert np.allclose(hostVar.reshape(savevar.shape), savevar.get())

    grad = CPUArray.toDevice(
        np.random.randn(batchsize, maps, h, w).astype(np.float32))
    ingrad, scalegrad, bgrad = instanceNorm2dBackward(grad, data, extscale,
                                                      extbias, savemean,
                                                      savevar, epsilon, desc)

    hostGrad = grad.get().reshape(grad.shape[0] * grad.shape[1], -1)
    hostScGrad = hostGrad * hostExtScale
    hostCorrs = np.empty(hostInvVar.shape, dtype=np.float32)
    for i in range(hostCorrs.shape[0]):
        hostCorrs[i] = np.dot(hostScGrad[i],
                              hostOutData[i]) / hostScGrad.shape[1]
    hostInGrad = hostScGrad - np.mean(
        hostScGrad, axis=1,
        keepdims=True) - hostCorrs[:, np.newaxis] * hostOutData
    hostInGrad *= hostInvVar[:, np.newaxis]

    hostScaleGrad = np.sum(np.sum(hostOutData * hostGrad,
                                  axis=1).reshape(batchsize, -1),
                           axis=0)
    hostBiasGrad = np.sum(np.sum(hostGrad, axis=1).reshape(batchsize, -1),
                          axis=0)

    assert np.allclose(hostInGrad.reshape(grad.shape), ingrad.get())
    assert np.allclose(hostScaleGrad.reshape((1, maps, 1, 1)), scalegrad.get())
    assert np.allclose(hostBiasGrad.reshape((1, maps, 1, 1)), bgrad.get())
示例#6
0
def unittest():
    A = CPUArray.toDevice(np.random.randn(5, 3).astype(np.float32))
    B = CPUArray.toDevice(np.random.randn(3, 4).astype(np.float32))

    C = mulMatrixOnMatrix(A, B)
    assert np.allclose(np.dot(A.get(), B.get()), C.get())

    F = mulMatrixOnMatrix(B, C, transpB=True)
    assert np.allclose(np.dot(B.get(), C.get().T), F.get())

    G = mulMatrixOnMatrix(F, B, transpA=True)
    assert np.allclose(np.dot(F.get().T, B.get()), G.get())
示例#7
0
def eltwiseTest():
    outdata = CPUArray.empty((10, ), dtype=np.float32)
    indata = CPUArray.toDevice(np.random.randn(10).astype(np.float32))

    square = ElementwiseKernel([(float_t.ptr, "outdata"),
                                (float_t.const.ptr, "indata")],
                               "outdata[i] = indata[i] * indata[i]", "square")

    square(outdata, indata)

    hostInData = indata.get()
    hostOutData = hostInData * hostInData

    assert np.allclose(hostOutData, outdata.get())
示例#8
0
def mulMatrixOnMatrix(A,
                      B,
                      out=None,
                      transpA=False,
                      transpB=False,
                      alpha=1.0,
                      beta=0.0):
    assert not (transpA and transpB)
    assert A.ndim == 2 and B.ndim == 2

    assert alpha == 1.0 and beta == 0.0

    if transpA:
        assert A.shape[0] == B.shape[0]
        shape = (A.shape[1], B.shape[1])

    elif transpB:
        assert A.shape[1] == B.shape[1]
        shape = (A.shape[0], B.shape[0])

    else:
        assert A.shape[1] == B.shape[0]
        shape = (A.shape[0], B.shape[1])

    A = A.data.T if transpA else A.data
    B = B.data.T if transpB else B.data

    if out is None:
        out = CPUArray.empty(shape, dtype=np.float32)

    np.dot(A, B, out=out.data)
    return out
示例#9
0
def maxpool2dTest():
    batchsize, maps, h, w = 1, 1, 8, 8
    data = CPUArray.toDevice(
        np.random.randn(batchsize, maps, h, w).astype(np.float32))

    outdata = pool2d(data)

    def maxDownSample2d(dat, factor):
        trimrows = dat.shape[0] // factor * factor
        trimcols = dat.shape[1] // factor * factor

        maxSoFar = None
        first = True

        for coff in range(factor):
            for roff in range(factor):
                hopped = dat[roff:trimrows:factor, coff:trimcols:factor]
                if first:
                    maxSoFar = hopped
                    first = False
                else:
                    maxSoFar = np.maximum(maxSoFar, hopped)

        return maxSoFar

    hostOutData = maxDownSample2d(data.get()[0, 0], 2)
    assert np.allclose(hostOutData, outdata.get())
示例#10
0
	def wrapTile(ary, times, axis):
		shape = (times, )
		if axis > 0:
			shape = (1, ) * axis + shape
		if axis < ary.ndim - 1:
			shape = shape + (1, ) * (ary.ndim - 1 - axis)

		out = np.tile(ary.data, shape)
		return CPUArray(out.shape, out.dtype, data=out, acquire=True)
示例#11
0
def addVectorToVector(x, y, out=None, alpha=1.0, beta=1.0):
    assert x.ndim == 1
    assert x.flags.forc and y.flags.forc
    assert x.shape == y.shape
    assert x.dtype == y.dtype and x.dtype == np.float32

    if out is None:
        out = CPUArray.empty(x.shape, dtype=np.float32)

    ElementWise.addVectorToVectorKer(out, x, y, alpha, beta)
    return out
示例#12
0
文件: Pad.py 项目: rsarbaev/PuzzleLib
def reflectpad1d(data, pad):
	assert data.dtype == np.float32 and data.ndim == 3

	batchsize, maps, insize = data.shape
	lpad, rpad = pad

	assert insize >= max(lpad, rpad) + 1
	outdata = CPUArray.empty((batchsize, maps, insize + lpad + rpad), dtype=data.dtype)

	mod.reflectpad1d(outdata.data, data.data, batchsize, maps, insize, lpad, rpad)
	return outdata
示例#13
0
文件: Pad.py 项目: rsarbaev/PuzzleLib
def reflectpad2d(data, pad):
	assert data.dtype == np.float32 and data.ndim == 4

	batchsize, maps, inh, inw = data.shape
	upad, bpad, lpad, rpad = pad

	assert inh >= max(upad, bpad) + 1 and inw >= max(lpad, rpad) + 1
	outdata = CPUArray.empty((batchsize, maps, inh + upad + bpad, inw + lpad + rpad), dtype=data.dtype)

	mod.reflectpad2d(outdata.data, data.data, batchsize, maps, inh, inw, upad, bpad, lpad, rpad)
	return outdata
示例#14
0
文件: Pad.py 项目: rsarbaev/PuzzleLib
def reflectpad1dTest():
	batchsize, maps, insize = 4, 8, 48
	lpad, rpad = 2, 3

	data = CPUArray.toDevice(np.random.randn(batchsize, maps, insize).astype(np.float32))
	outdata = reflectpad1d(data, pad=(lpad, rpad))

	hostData, hostOutData = data.get(), outdata.get()

	assert np.allclose(hostOutData[:, :, lpad:insize + lpad], hostData)
	assert np.allclose(hostOutData[:, :, :lpad][:, :, ::-1], hostData[:, :, 1:lpad+1])
	assert np.allclose(hostOutData[:, :, insize + lpad:][:, :, ::-1], hostData[:, :, insize - 1 - rpad:insize - 1])
示例#15
0
def reductionTest():
    data = CPUArray.toDevice(np.random.randn(10).astype(np.float32))

    accumulate = ReductionKernel(np.float32,
                                 neutral="0.0f",
                                 reduceExpr="a + b",
                                 mapExpr="data[i]",
                                 arguments=[(float_t.const.ptr, "data")])

    acc = accumulate(data)

    hostSum = np.sum(data.get())
    assert np.allclose(hostSum, acc.get())
示例#16
0
def upsample2d(data, scale, mode="nearest"):
	batchsize, maps, inh, inw = data.shape
	hscale, wscale = (scale, scale) if isinstance(scale, int) else scale

	outh, outw = hscale * inh, wscale * inw
	outdata = CPUArray.empty((batchsize, maps, outh, outw), dtype=data.dtype)

	if mode == "nearest":
		nearestMod.upsample2dNearest(outdata.data, data.data, batchsize, maps, inh, inw, hscale, wscale)

	else:
		raise ValueError("Unsupported upsampling mode")

	return outdata
示例#17
0
def svm(scores, labels, mode, error=None):
    assert scores.dtype == np.float32 and labels.dtype == np.int32
    shape = scores.shape

    grad = CPUArray.empty(shape, dtype=np.float32)
    if error is None:
        error = CPUArray.empty((), dtype=np.float32)

    error.fill(0.0)

    spatialDim = int(np.prod(scores.shape[2:]))
    mapStride = spatialDim * scores.shape[1]

    if mode == "l1":
        krl = svmL1Mod.cost
    elif mode == "l2":
        krl = svmL2Mod.cost
    else:
        raise ValueError()

    krl(scores.data, labels.data, mapStride, spatialDim, shape[1], shape[0],
        error.data, grad.data, scores.size)
    return error, grad
示例#18
0
def batchNorm2d(data,
                scale,
                bias,
                mean,
                var,
                epsilon=1e-5,
                test=False,
                out=None):
    assert data.ndim == scale.ndim and scale.ndim == bias.ndim and bias.ndim == mean.ndim and mean.ndim == var.ndim
    assert test

    scale = scale.data / np.sqrt(var.data + epsilon)
    outdata = scale * (data.data - mean.data) + bias.data

    return CPUArray(outdata.shape, outdata.dtype, data=outdata, acquire=True)
示例#19
0
文件: Pad.py 项目: rsarbaev/PuzzleLib
def reflectpad2dTest():
	batchsize, maps, inh, inw = 4, 8, 12, 15
	upad, bpad, lpad, rpad = 2, 3, 2, 3

	data = CPUArray.toDevice(np.random.randn(batchsize, maps, inh, inw).astype(np.float32))
	outdata = reflectpad2d(data, pad=(upad, bpad, lpad, rpad))

	hostData, hostOutData = data.get(), outdata.get()

	assert np.allclose(hostOutData[:, :, upad:inh + upad, lpad:inw + lpad], hostData)
	assert np.allclose(hostOutData[:, :, :upad, :lpad][:, :, ::-1, ::-1], hostData[:, :, 1:upad + 1, 1:lpad + 1])
	assert np.allclose(
		hostOutData[:, :, inh + upad:, inw + lpad:][:, :, ::-1, ::-1],
		hostData[:, :, inh - 1 - bpad:inh - 1, inw - 1 - rpad:inw - 1]
	)
示例#20
0
def crossEntropyTest():
    scores = CPUArray.toDevice(np.random.randn(20, 10, 3).astype(np.float32))
    labels = CPUArray.toDevice(
        np.random.randint(low=0, high=10, size=(20, 3)).astype(np.int32))

    error, grad = crossEntropy(scores, labels)

    def softmax(w):
        e = np.exp(w - np.amax(w))
        dist = e / np.sum(e)
        return dist

    def hostCrossEntropy(smax, target):
        smax = np.moveaxis(smax, 1, -1).reshape(-1, smax.shape[1])
        target = target.flatten()
        err = np.sum(
            np.log(np.array([smax[i, target[i]]
                             for i in range(smax.shape[0])])))

        return -err / target.size

    def hostCrossEntropyGrad(target, smax):
        return np.array([(target == i) - smax[i]
                         for i in range(smax.shape[0])])

    hostSoftmax = np.apply_along_axis(softmax, 1, scores.get())

    hostGrad = np.vstack([
        hostCrossEntropyGrad(labels.get()[i], hostSoftmax[i]) / scores.shape[0]
        for i in range(scores.shape[0])
    ]).reshape(*hostSoftmax.shape)

    assert np.allclose(hostGrad, grad.get())

    hostError = hostCrossEntropy(hostSoftmax, labels.get())
    assert np.isclose(hostError, error.get() / scores.shape[0])
示例#21
0
def instanceNorm2d(data, scale, bias, epsilon=1e-5):
    batchsize = data.shape[0]
    if batchsize > 1:
        extscale = CPUArray.toDevice(np.tile(scale.data, (batchsize, 1, 1)))
        extbias = CPUArray.toDevice(np.tile(bias.data, (batchsize, 1, 1)))

    else:
        extscale = scale
        extbias = bias

    indata = data.reshape(1, batchsize * data.shape[1], data.shape[2],
                          data.shape[3])
    mean = CPUArray.empty((1, indata.shape[1], 1, 1), dtype=np.float32)
    var = CPUArray.empty((1, indata.shape[1], 1, 1), dtype=np.float32)

    outdata, savemean, savevar, desc = DNNL.batchNormNd(indata,
                                                        extscale,
                                                        extbias,
                                                        mean,
                                                        var,
                                                        epsilon,
                                                        test=False)
    return outdata.reshape(
        data.shape), savemean, savevar, extscale, extbias, desc
示例#22
0
def instanceNorm2dBackward(grad,
                           data,
                           extscale,
                           extbias,
                           savemean,
                           savevar,
                           epsilon,
                           desc,
                           affine=True):
    batchsize, maps = grad.shape[:2]

    outgrad = grad.reshape(1, batchsize * grad.shape[1], grad.shape[2],
                           grad.shape[3])
    indata = data.reshape(1, batchsize * data.shape[1], data.shape[2],
                          data.shape[3])

    ingrad, scalegrad, biasgrad = DNNL.batchNormNdBackward(
        indata, outgrad, extscale, extbias, savemean, savevar, desc, epsilon)

    if affine and batchsize > 1:
        scalegrad = np.sum(scalegrad.data.reshape(batchsize, -1),
                           axis=0).reshape((1, maps, 1, 1))
        biasgrad = np.sum(biasgrad.data.reshape(batchsize, -1),
                          axis=0).reshape((1, maps, 1, 1))

        scalegrad = CPUArray(scalegrad.shape,
                             scalegrad.dtype,
                             data=scalegrad,
                             acquire=True)
        biasgrad = CPUArray(biasgrad.shape,
                            biasgrad.dtype,
                            data=biasgrad,
                            acquire=True)

    return (ingrad.reshape(grad.shape), scalegrad,
            biasgrad) if affine else ingrad.reshape(grad.shape)
示例#23
0
    def __call__(self, *args, **kwargs):
        if self.module is None:
            source, functions = self.generateSource()
            self.module = SourceModule(source,
                                       functions,
                                       converter=self.paramConverter,
                                       finalizer=self.funcFinalizer,
                                       debug=self.debug)

        acc = self.module.reduction(
            *(arg.data if isinstance(arg, CPUArray) else arg for arg in args))

        result = CPUArray.empty((), self.outtype)
        result.fill(acc)

        return result
示例#24
0
def sumOnMatrix(A, out=None, cols=True, alpha=1.0, beta=0.0):
    assert A.ndim == 2
    assert A.flags.c_contiguous
    assert A.dtype == np.float32

    if out is None:
        out = CPUArray.empty((A.shape[1], ) if cols else (A.shape[0], ),
                             dtype=np.float32)

    if alpha == 1.0 and beta == 0.0:
        np.sum(A.data, axis=0 if cols else 1, out=out.data)

    else:
        s = np.sum(A.data, axis=0 if cols else 1)
        np.add(beta * out.data, alpha * s, out=out.data)

    return out
示例#25
0
def pool2d(data, size=2, stride=2, pad=0, mode=PoolMode.max):
    assert data.ndim == 4
    onRow = np.max if mode == PoolMode.max else np.mean

    batchsize, maps, inh, inw = data.shape
    size, stride, pad = repeatValue(size,
                                    2), repeatValue(stride,
                                                    2), repeatValue(pad, 2)

    outh, outw = outshape((inh, inw), size, stride, pad)

    coldata = im2col(data.data.reshape(batchsize * maps, 1, inh, inw), size,
                     stride, pad)
    outdata = onRow(coldata, axis=1, keepdims=True).reshape(
        (batchsize, maps, outh, outw))

    return CPUArray(outdata.shape, outdata.dtype, data=outdata, acquire=True)
示例#26
0
def conv2d(data, W, bias=None, stride=1, pad=0):
    assert data.ndim == 4 and W.ndim == 4

    batchsize, _, inh, inw = data.shape
    stride, pad = repeatValue(stride, 2), repeatValue(pad, 2)

    outmaps, _, hsize, wsize = W.shape
    outh, outw = outshape((inh, inw), (hsize, wsize), stride, pad)

    coldata = im2col(data.data, W.shape[2:], stride, pad)
    W = W.data.reshape(W.shape[0], -1).T

    bias = bias.data.reshape(1, bias.shape[1]) if bias is not None else None
    outdata = linear(coldata, W, bias)

    outdata = col2im(outdata, outmaps, (outh, outw))
    return CPUArray(outdata.shape, outdata.dtype, data=outdata, acquire=True)
示例#27
0
def unittest():
	batchsize, maps, inh, inw = 3, 2, 16, 15
	scale = 2

	data = CPUArray.toDevice(np.random.uniform(low=-1.0, high=1.0, size=(batchsize, maps, inh, inw)).astype(np.float32))
	outdata = upsample2d(data, scale, mode="nearest")

	hostData = data.get()
	hostOutData = np.empty(outdata.shape, dtype=np.float32)

	for b in range(batchsize):
		for c in range(maps):
			for y in range(inh):
				for x in range(inw):
					hostOutData[b, c, y * scale:(y + 1) * scale, x * scale:(x + 1) * scale] = hostData[b, c, y, x]

	assert np.allclose(hostOutData, outdata.get())
示例#28
0
def mulMatrixOnMatrix(A,
                      B,
                      out=None,
                      transpA=False,
                      transpB=False,
                      alpha=1.0,
                      beta=0.0):
    assert not (transpA and transpB)
    assert A.ndim == 2 and B.ndim == 2

    assert A.dtype == B.dtype and A.dtype == np.float32
    assert A.flags.c_contiguous and B.flags.c_contiguous

    if transpA:
        assert A.shape[0] == B.shape[0]
        shape = (A.shape[1], B.shape[1])
    elif transpB:
        assert A.shape[1] == B.shape[1]
        shape = (A.shape[0], B.shape[0])
    else:
        assert A.shape[1] == B.shape[0]
        shape = (A.shape[0], B.shape[1])

    if out is None:
        out = CPUArray.empty(shape, dtype=np.float32)

    if transpA:
        k, m = A.shape
        n = B.shape[1]
        libdnnl.dnnl_sgemm('t', 'n', m, n, k, alpha, A.ptr, m, B.ptr, n, beta,
                           out.ptr, n)
    elif transpB:
        m, k = A.shape
        n = B.shape[0]
        libdnnl.dnnl_sgemm('n', 't', m, n, k, alpha, A.ptr, k, B.ptr, k, beta,
                           out.ptr, n)
    else:
        m, k = A.shape
        n = B.shape[1]
        libdnnl.dnnl_sgemm('n', 'n', m, n, k, alpha, A.ptr, k, B.ptr, n, beta,
                           out.ptr, n)

    return out
示例#29
0
    def build(self):
        nbytes = 0
        for reg in self.regs:
            shape, dtype, _ = reg
            assert dtype == self.dtype
            nbytes += int(np.prod(shape) * dtype(0).itemsize)

        self.mem = CPUArray.empty((nbytes, ), np.uint8)
        offset = 0

        for shape, dtype, name in self.regs:
            regbytes = int(np.prod(shape) * dtype(0).itemsize)
            assert offset + regbytes <= self.mem.size

            self.blocks[name] = self.mem[offset:offset +
                                         regbytes].view(dtype).reshape(shape)
            offset += regbytes

        self.regs.clear()
        self.ary = self.mem.view(dtype=self.dtype)
示例#30
0
    def recognize(self, audio_path):
        preprocessed_audio = preprocess(audio_path, self.sample_rate, self.window_size, self.window_stride)
        if self.cpu:
            from PuzzleLib.CPU.CPUArray import CPUArray
            inputs = CPUArray.toDevice(np.array([preprocessed_audio]).astype(np.float32))
        else:
            from PuzzleLib.Backend import gpuarray
            inputs = gpuarray.to_gpu(np.array([preprocessed_audio]).astype(np.float16))

        output = self.w2l(inputs).get()
        output = np.vstack(output).astype(np.float32)
        result = self.decoder.decode(output)

        if not self.cpu:
            from PuzzleLib.Backend.gpuarray import memoryPool
            memoryPool.freeHeld()

        del inputs, output

        return result