Exemplo n.º 1
0
def inference_gru_block_vs_gru_cell(batch_size,
                                    cell_size,
                                    input_size,
                                    time_steps,
                                    use_gpu=False,
                                    iters=30):
    """Benchmark inference speed between GRUBlockCell vs GRUCell."""
    ops.reset_default_graph()
    with session.Session(graph=ops.Graph()) as sess:
        with benchmarking.device(use_gpu):

            # Random initializers.
            seed = 1994
            initializer = init_ops.random_uniform_initializer(-1, 1, seed=seed)
            np.random.seed(seed)

            # Inputs
            concat_x = vs.get_variable("concat_x",
                                       [time_steps, batch_size, input_size])
            h = vs.get_variable("h", [batch_size, cell_size])

            # Output from the basic GRU cell implementation.
            with vs.variable_scope("basic", initializer=initializer):
                cell = rnn_cell.GRUCell(cell_size)
                outputs_dynamic, _ = rnn.dynamic_rnn(cell,
                                                     inputs=concat_x,
                                                     initial_state=h,
                                                     time_major=True,
                                                     dtype=dtypes.float32)
                sess.run([variables.global_variables_initializer()])
                basic_time_inference = benchmarking.seconds_per_run(
                    outputs_dynamic, sess, iters)

            # Output from the block GRU cell implementation.
            with vs.variable_scope("block", initializer=initializer):
                cell = gru_ops.GRUBlockCell(cell_size)
                outputs_dynamic, _ = rnn.dynamic_rnn(cell,
                                                     inputs=concat_x,
                                                     initial_state=h,
                                                     time_major=True,
                                                     dtype=dtypes.float32)
                sess.run([variables.global_variables_initializer()])
                block_time_inference = benchmarking.seconds_per_run(
                    outputs_dynamic, sess, iters)

        performance_inference = (basic_time_inference - block_time_inference
                                 ) * 100 / basic_time_inference
        print(",".join([
            str(batch_size),
            str(cell_size),
            str(input_size),
            str(time_steps),
            str(use_gpu),
            str(basic_time_inference),
            str(block_time_inference),
            str(performance_inference)
        ]))

        return basic_time_inference, block_time_inference
Exemplo n.º 2
0
    def benchmarkLSTMBlockCellFpropWithDynamicRNN(self):
        print("BlockLSTMCell forward propagation via dynamic_rnn().")
        print("--------------------------------------------------------------")
        print("LSTMBlockCell Seconds per inference.")
        print("batch_size,cell_size,input_size,time_steps,use_gpu,wall_time")
        iters = 10
        for config in benchmarking.dict_product({
                "batch_size": [1, 8, 13, 32, 67, 128],
                "cell_size": [128, 250, 512, 650, 1024, 1350],
                "time_steps": [40],
                "use_gpu": [True, False],
                "dtype": ["float32", "float16"],
        }):
            dtype = dtypes.float32 if config[
                "dtype"] == "float32" else dtypes.float16
            with ops.Graph().as_default():
                with benchmarking.device(use_gpu=config["use_gpu"]):
                    inputs = variable_scope.get_variable(
                        "x",
                        dtype=dtype,
                        shape=[
                            config["time_steps"], config["batch_size"],
                            config["cell_size"]
                        ])
                    cell = lstm_ops.LSTMBlockCell(config["cell_size"],
                                                  dtype=dtype)
                    outputs = rnn.dynamic_rnn(cell,
                                              inputs,
                                              time_major=True,
                                              dtype=dtype)
                    init_op = variables.global_variables_initializer()

                with session.Session() as sess:
                    sess.run(init_op)
                    wall_time = benchmarking.seconds_per_run(
                        outputs, sess, iters)

                # Print to stdout. If the TEST_REPORT_FILE_PREFIX environment variable
                # is set, this will produce a copy-paste-able CSV file.
                print(",".join(
                    map(str, [
                        config["dtype"], config["batch_size"],
                        config["cell_size"], config["cell_size"],
                        config["time_steps"], config["use_gpu"], wall_time
                    ])))
                benchmark_name_template = "_".join([
                    "LSTMBlockCell_fprop", "DT_%(dtype)s", "BS%(batch_size)i",
                    "CS%(cell_size)i", "IS%(cell_size)i", "TS%(time_steps)i",
                    "gpu_%(use_gpu)s"
                ])

                self.report_benchmark(name=benchmark_name_template % config,
                                      iters=iters,
                                      wall_time=wall_time,
                                      extras=config)
Exemplo n.º 3
0
def single_bprop_step_gru_block_vs_gru_cell(batch_size,
                                            cell_size,
                                            input_size,
                                            use_gpu=False,
                                            iters=30):
    """Benchmark single bprop step speed between GRUBlockCell vs GRUCell."""
    ops.reset_default_graph()
    with session.Session(graph=ops.Graph()) as sess:
        with benchmarking.device(use_gpu):
            initializer = init_ops.random_uniform_initializer(-1, 1, seed=1989)
            # Inputs
            x = vs.get_variable("x", [batch_size, input_size])
            h = vs.get_variable("h", [batch_size, cell_size])

            # Output from the basic GRU cell implementation.
            with vs.variable_scope("basic", initializer=initializer):
                output = rnn_cell.GRUCell(cell_size)(array_ops.identity(x),
                                                     array_ops.identity(h))
                sess.run([variables.global_variables_initializer()])
                grad_output_wrt_input = gradients_impl.gradients([output], h)
                basic_time_bprop = benchmarking.seconds_per_run(
                    grad_output_wrt_input, sess, iters)

            # Output from the block GRU cell implementation.
            with vs.variable_scope("block", initializer=initializer):
                output = gru_ops.GRUBlockCell(cell_size)(array_ops.identity(x),
                                                         array_ops.identity(h))
                sess.run([variables.global_variables_initializer()])
                grad_output_wrt_input = gradients_impl.gradients([output], h)
                block_time_bprop = benchmarking.seconds_per_run(
                    grad_output_wrt_input, sess, iters)

    performance_inference = (basic_time_bprop -
                             block_time_bprop) * 100 / basic_time_bprop

    print(",".join([
        str(batch_size),
        str(cell_size),
        str(input_size),
        str(use_gpu),
        str(basic_time_bprop),
        str(block_time_bprop),
        str(performance_inference)
    ]))

    return basic_time_bprop, block_time_bprop
Exemplo n.º 4
0
def training_gru_block_vs_gru_cell(batch_size,
                                   cell_size,
                                   input_size,
                                   time_steps,
                                   use_gpu=False,
                                   iters=30):
    """Benchmark training speed between GRUBlockCell vs GRUCell."""
    ops.reset_default_graph()
    with session.Session(graph=ops.Graph()) as sess:
        # Specify the device which is been used.
        with benchmarking.device(use_gpu):

            # Random initializers.
            seed = 1994
            initializer = init_ops.random_uniform_initializer(-1, 1, seed=seed)
            np.random.seed(seed)

            # Inputs
            concat_x = vs.get_variable("concat_x",
                                       [time_steps, batch_size, input_size])
            h = vs.get_variable("h", [batch_size, cell_size])
            y = vs.get_variable("y", [time_steps, batch_size, cell_size])

            # Output from the basic GRU cell implementation.
            with vs.variable_scope("basic", initializer=initializer):
                cell = rnn_cell.GRUCell(cell_size)

                outputs_dynamic, _ = rnn.dynamic_rnn(cell,
                                                     inputs=concat_x,
                                                     initial_state=h,
                                                     time_major=True,
                                                     dtype=dtypes.float32)
                sess.run([variables.global_variables_initializer()])
                cost = math_ops.reduce_mean(
                    math_ops.square(outputs_dynamic - y))
                learning_rate = 0.01
                optimizer = gradient_descent.GradientDescentOptimizer(
                    learning_rate).minimize(cost)

                # time for a training step.
                basic_time_training = benchmarking.seconds_per_run(
                    optimizer, sess, iters)

            # Output from the basic GRU cell implementation.
            with vs.variable_scope("block", initializer=initializer):
                cell = gru_ops.GRUBlockCell(cell_size)

                outputs_dynamic, _ = rnn.dynamic_rnn(cell,
                                                     inputs=concat_x,
                                                     initial_state=h,
                                                     time_major=True,
                                                     dtype=dtypes.float32)
                sess.run([variables.global_variables_initializer()])
                cost = math_ops.reduce_mean(
                    math_ops.square(outputs_dynamic - y))
                learning_rate = 0.01
                optimizer = gradient_descent.GradientDescentOptimizer(
                    learning_rate).minimize(cost)

                # time for a training step.
                block_time_training = benchmarking.seconds_per_run(
                    optimizer, sess, iters)

        performance_training = (basic_time_training - block_time_training
                                ) * 100 / basic_time_training

        print(",".join([
            str(batch_size),
            str(cell_size),
            str(input_size),
            str(time_steps),
            str(use_gpu),
            str(basic_time_training),
            str(block_time_training),
            str(performance_training)
        ]))

        return basic_time_training, block_time_training