def testBiasRelu(self):

        config = tf.ConfigProto(
            intra_op_parallelism_threads=1,
            inter_op_parallelism_threads=1)

        with self.test_session(config=config) as sess:
            for shape in shapes:

                # shape[0] //= 24
                # shape[0]  *= 512

                if ones:
                    cpuX = np.ones(shape, dtype=p.float32)
                    cpuE = np.ones(shape, dtype=p.float32)
                    cpuB = np.ones(shape[1:], dtype=p.float32)
                else:
                    cpuX = np.random.uniform(-1.0, 1.0, shape).astype(np.float16).astype(np.float32)
                    cpuE = np.random.uniform(-1.0, 1.0, shape).astype(np.float16).astype(np.float32)
                    cpuB = np.random.uniform(-1.0, 1.0, shape[1:]).astype(np.float32)

                for relu in (True, False):
                    for dtype in (tf.float32, ):  #tf.float16, tf.bfloat16

                        results = []
                        for device in ("gpu", "cpu"):
                            if bench and device == "cpu":
                                break

                            cast = device == "gpu" and dtype is not tf.float32

                            with tf.device("/%s:0" % device), tf.name_scope(device):

                                x = tf.placeholder(tf.float32, cpuX.shape)
                                e = tf.placeholder(tf.float32, cpuE.shape)
                                b = tf.placeholder(tf.float32, cpuB.shape)

                                feed_dict = { x: cpuX, e: cpuE, b:cpuB }

                                xc = ew.float_cast(x, dtype=dtype) if cast else x

                                y = ew.bias_relu(xc, b, relu=relu, atomics=atomics, bench=bench)

                                if cast:
                                    y = ew.float_cast(y, dtype=tf.float32)

                                dx, db = tf.gradients(y, [x, b], e)

                                results.append( sess.run( [ y, dx, db ], feed_dict ) )

                        if not bench:
                            for op, dev, cpu in zip(["y", "dx", "db"], results[0], results[1]):

                                dif     = np.abs(cpu - dev)
                                avgval  = np.average(abs(cpu))
                                maxdif  = dif.max()
                                max_err = maxdif if avgval == 0 else maxdif / avgval
                                l2_err  = np.sqrt(np.square(dif).sum()) / np.sqrt(np.square(cpu).sum())

                                print("%s, shape:%14s, op:%3s(%d), err:%17.12f, l2_err:%17.12f" % (dtype.name, str(cpu.shape), op, relu, maxdif, l2_err))
Пример #2
0
def conv1d(x, scope, nf, relu=False):
    with tf.variable_scope(scope):
        nx    = x.shape[-1].value
        ndims = x.shape.ndims

        w = tf.get_variable("w", [nx, nf], initializer=tf.random_normal_initializer(stddev=0.02))
        b = tf.get_variable("b", [    nf], initializer=tf.constant_initializer(0.0))

        # merge context and batch dims for more efficient matmul
        if ndims > 2:
            y_shape = tf.concat([tf.shape(x)[: ndims - 1], [nf]], axis=0)
            x = tf.reshape(x, [-1, nx])

        # avoid atomics in bias grad, but be careful as tf handles temp memory badly in the presense of async ops like all-reduce
        y = bias_relu(tf.matmul(x, fp16(w)), b, relu=relu, atomics=False)

        if ndims > 2:
            y = tf.reshape(y, y_shape)

        return y
Пример #3
0
def conv1d(x, scope, nf, hps, w_init=tf.random_normal_initializer(stddev=0.02), b_init=tf.constant_initializer(0), relu=False):
    with tf.variable_scope(scope):
        nx    = x.shape[-1].value
        ndims = x.shape.ndims

        w = tf.get_variable("w", [nx, nf], initializer=w_init)
        b = tf.get_variable("b", [    nf], initializer=b_init)

        if ndims > 2:
            y_shape = tf.concat([tf.shape(x)[ : ndims-1], [nf]], axis=0)
            x = tf.reshape(x, [-1, nx])

        scope = tf.get_variable_scope().name
        w = quantize_pre(w, name=scope+"/pre_w", tag=hps.tag)
        x = quantize_pre(x, name=scope+"/pre_x", tag=hps.tag)
        y = tf.matmul(x, w)
        y = quantize_post(y, name=scope+"/post_x", tag=hps.tag)
        y = bias_relu(y, b, relu=relu)

        if ndims > 2:
            y = tf.reshape(y, y_shape)

        return y