def benchmarkSingleBpropStepBlockGRUVsGRUCell(self): print("--------------------------------------------------------------") print("Single bprop step speed GRUBlockCell vs GRUCell") print("batch_size, cell_size, input_size, GPU, basic_time, " "block_time, performance_inference[%]") iters = 10 for config in benchmarking.dict_product({ "use_gpu": [True, False], "batch_size": [1, 32, 128], "cell_size": [128, 512], "input_size": [128, 512] }): basic_time, block_time = single_bprop_step_gru_block_vs_gru_cell( config["batch_size"], config["cell_size"], config["input_size"], config["use_gpu"], iters) self.report_benchmark( name="GRUCell_Bprop_single_step_time_BS%i_CS%i_IS%i_gpu_%s" % (config["batch_size"], config["cell_size"], config["input_size"], config["use_gpu"]), iters=iters, wall_time=basic_time) self.report_benchmark( name="GRUBlockCell_Bprop_single_step_time_BS%i_CS%i_IS%i_gpu_%s" % (config["batch_size"], config["cell_size"], config["input_size"], config["use_gpu"]), iters=iters, wall_time=block_time) print("--------------------------------------------------------------")
def benchmarkTrainingBlockGRUVsGRUCell(self): print("Comparison GRUBlockCell vs GRUCell") print("--------------------------------------------------------------") print("Training speed GRUBlockCell vs GRUCell") print("batch_size, cell_size, input_size, time_steps, GPU, " "basic_time_training, block_time_training, performance_training[%]") iters = 10 for config in benchmarking.dict_product({ "use_gpu": [True, False], "batch_size": [1, 32, 128], "cell_size": [128, 512], "input_size": [128, 512], "time_steps": [50] }): basic_time, block_time = training_gru_block_vs_gru_cell( config["batch_size"], config["cell_size"], config["input_size"], config["time_steps"], config["use_gpu"], iters) self.report_benchmark( name="GRUCell_training_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" % (config["batch_size"], config["cell_size"], config["input_size"], config["time_steps"], config["use_gpu"]), iters=iters, wall_time=basic_time) self.report_benchmark( name="GRUBlockCell_training_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" % (config["batch_size"], config["cell_size"], config["input_size"], config["time_steps"], config["use_gpu"]), iters=iters, wall_time=block_time)
def benchmarkLSTMBlockCellBpropWithDynamicRNN(self): print("BlockLSTMCell backward propagation via dynamic_rnn().") print("--------------------------------------------------------------") print("LSTMBlockCell Seconds per inference.") print("batch_size,cell_size,input_size,time_steps,use_gpu,wall_time") iters = 10 for config in benchmarking.dict_product({ "batch_size": [1, 8, 13, 32, 67, 128], "cell_size": [128, 250, 512, 650, 1024, 1350], "time_steps": [40], "use_gpu": [True, False] }): with ops.Graph().as_default(): with benchmarking.device(use_gpu=config["use_gpu"]): time_steps = config["time_steps"] batch_size = config["batch_size"] cell_size = input_size = config["cell_size"] inputs = variable_scope.get_variable( "x", [time_steps, batch_size, cell_size], trainable=False, dtype=dtypes.float32) with variable_scope.variable_scope( "rnn", reuse=variable_scope.AUTO_REUSE): w = variable_scope.get_variable( "rnn/lstm_cell/kernel", shape=[input_size + cell_size, cell_size * 4], dtype=dtypes.float32) b = variable_scope.get_variable( "rnn/lstm_cell/bias", shape=[cell_size * 4], dtype=dtypes.float32, initializer=init_ops.zeros_initializer()) cell = lstm_ops.LSTMBlockCell(cell_size) outputs = rnn.dynamic_rnn( cell, inputs, time_major=True, dtype=dtypes.float32) grads = gradients_impl.gradients(outputs, [inputs, w, b]) init_op = variables.global_variables_initializer() with session.Session() as sess: sess.run(init_op) wall_time = benchmarking.seconds_per_run(grads, sess, iters) # Print to stdout. If the TEST_REPORT_FILE_PREFIX environment variable # is set, this will produce a copy-paste-able CSV file. print(",".join( map(str, [ batch_size, cell_size, cell_size, time_steps, config["use_gpu"], wall_time ]))) benchmark_name_template = "_".join([ "LSTMBlockCell_bprop", "BS%(batch_size)i", "CS%(cell_size)i", "IS%(cell_size)i", "TS%(time_steps)i", "gpu_%(use_gpu)s" ]) self.report_benchmark( name=benchmark_name_template % config, iters=iters, wall_time=wall_time, extras=config)
def benchmarkLSTMBlockCellFpropWithDynamicRNN(self): print("BlockLSTMCell forward propagation via dynamic_rnn().") print("--------------------------------------------------------------") print("LSTMBlockCell Seconds per inference.") print("batch_size,cell_size,input_size,time_steps,use_gpu,wall_time") iters = 10 for config in benchmarking.dict_product({ "batch_size": [1, 8, 13, 32, 67, 128], "cell_size": [128, 250, 512, 650, 1024, 1350], "time_steps": [40], "use_gpu": [True, False], "dtype": ["float32", "float16"], }): dtype = dtypes.float32 if config[ "dtype"] == "float32" else dtypes.float16 with ops.Graph().as_default(): with benchmarking.device(use_gpu=config["use_gpu"]): inputs = variable_scope.get_variable( "x", dtype=dtype, shape=[ config["time_steps"], config["batch_size"], config["cell_size"] ]) cell = lstm_ops.LSTMBlockCell(config["cell_size"], dtype=dtype) outputs = rnn.dynamic_rnn(cell, inputs, time_major=True, dtype=dtype) init_op = variables.global_variables_initializer() with session.Session() as sess: sess.run(init_op) wall_time = benchmarking.seconds_per_run( outputs, sess, iters) # Print to stdout. If the TEST_REPORT_FILE_PREFIX environment variable # is set, this will produce a copy-paste-able CSV file. print(",".join( map(str, [ config["dtype"], config["batch_size"], config["cell_size"], config["cell_size"], config["time_steps"], config["use_gpu"], wall_time ]))) benchmark_name_template = "_".join([ "LSTMBlockCell_fprop", "DT_%(dtype)s", "BS%(batch_size)i", "CS%(cell_size)i", "IS%(cell_size)i", "TS%(time_steps)i", "gpu_%(use_gpu)s" ]) self.report_benchmark(name=benchmark_name_template % config, iters=iters, wall_time=wall_time, extras=config)
def benchmarkLSTMBlockCellFpropWithDynamicRNN(self): print("BlockLSTMCell forward propagation via dynamic_rnn().") print("--------------------------------------------------------------") print("LSTMBlockCell Seconds per inference.") print("batch_size,cell_size,input_size,time_steps,use_gpu,wall_time") iters = 10 for config in benchmarking.dict_product({ "batch_size": [1, 8, 13, 32, 67, 128], "cell_size": [128, 250, 512, 650, 1024, 1350], "time_steps": [40], "use_gpu": [True, False], "dtype": ["float32", "float16"], }): dtype = dtypes.float32 if config["dtype"] == "float32" else dtypes.float16 with ops.Graph().as_default(): with benchmarking.device(use_gpu=config["use_gpu"]): inputs = variable_scope.get_variable( "x", dtype=dtype, shape=[ config["time_steps"], config["batch_size"], config["cell_size"] ]) cell = lstm_ops.LSTMBlockCell(config["cell_size"], dtype=dtype) outputs = rnn.dynamic_rnn(cell, inputs, time_major=True, dtype=dtype) init_op = variables.global_variables_initializer() with session.Session() as sess: sess.run(init_op) wall_time = benchmarking.seconds_per_run(outputs, sess, iters) # Print to stdout. If the TEST_REPORT_FILE_PREFIX environment variable # is set, this will produce a copy-paste-able CSV file. print(",".join( map(str, [ config["dtype"], config["batch_size"], config["cell_size"], config["cell_size"], config["time_steps"], config["use_gpu"], wall_time ]))) benchmark_name_template = "_".join([ "LSTMBlockCell_fprop", "DT_%(dtype)s", "BS%(batch_size)i", "CS%(cell_size)i", "IS%(cell_size)i", "TS%(time_steps)i", "gpu_%(use_gpu)s" ]) self.report_benchmark( name=benchmark_name_template % config, iters=iters, wall_time=wall_time, extras=config)