def gru_base(self, create_rnn, ref, outputs_with_grads, input_tensor, fwd_only, drop_states, linear_before_reset, gc, dc): print("GRU test parameters: ", locals()) t, n, d = input_tensor.shape assert d % 3 == 0 d = d // 3 ref = partial(ref, drop_states=drop_states, linear_before_reset=linear_before_reset) with core.DeviceScope(gc): net = _prepare_rnn( t, n, d, create_rnn, outputs_with_grads=outputs_with_grads, memory_optim=False, forget_bias=0.0, forward_only=fwd_only, drop_states=drop_states, linear_before_reset=linear_before_reset, num_states=1, )[1] # here we don't provide a real input for the net but just for one of # its ops (RecurrentNetworkOp). So have to hardcode this name workspace.FeedBlob("test_name_scope/external/recurrent/i2h", input_tensor, device_option=gc) op = net._net.op[-1] inputs = [workspace.FetchBlob(name) for name in op.input] self.assertReferenceChecks( gc, op, inputs, ref, input_device_options={"test_name_scope/timestep": hu.cpu_do}, outputs_to_check=list(range(2)), ) # Checking for input, gates_t_w and gates_t_b gradients if not fwd_only: for param in range(2): print("Check param {}".format(param)) self.assertGradientChecks( device_option=gc, op=op, inputs=inputs, outputs_to_check=param, outputs_with_grads=outputs_with_grads, threshold=0.001, stepsize=0.005, input_device_options={ "test_name_scope/timestep": hu.cpu_do }, )
def test_layered_lstm(self, input_tensor, **kwargs): for outputs_with_grads in [[0], [1], [0, 1, 2, 3]]: for memory_optim in [False, True]: _, net, inputs = _prepare_rnn( *input_tensor.shape, create_rnn=rnn_cell.LSTM, outputs_with_grads=outputs_with_grads, memory_optim=memory_optim, **kwargs) workspace.FeedBlob(inputs[-1], input_tensor) workspace.RunNetOnce(net) workspace.ResetWorkspace()
def lstm_base(self, lstm_type, outputs_with_grads, memory_optim, input_tensor, forget_bias, fwd_only, drop_states): print("LSTM test parameters: ", locals()) create_lstm, ref = lstm_type ref = partial(ref, forget_bias=forget_bias) t, n, d = input_tensor.shape assert d % 4 == 0 d = d // 4 ref = partial(ref, forget_bias=forget_bias, drop_states=drop_states) net = _prepare_rnn(t, n, d, create_lstm, outputs_with_grads=outputs_with_grads, memory_optim=memory_optim, forget_bias=forget_bias, forward_only=fwd_only, drop_states=drop_states)[1] # here we don't provide a real input for the net but just for one of # its ops (RecurrentNetworkOp). So have to hardcode this name workspace.FeedBlob("test_name_scope/external/recurrent/i2h", input_tensor) op = net._net.op[-1] inputs = [workspace.FetchBlob(name) for name in op.input] # Validate forward only mode is in effect if fwd_only: for arg in op.arg: self.assertFalse(arg.name == 'backward_step_net') self.assertReferenceChecks( hu.cpu_do, op, inputs, ref, outputs_to_check=list(range(4)), ) # Checking for input, gates_t_w and gates_t_b gradients if not fwd_only: for param in range(5): self.assertGradientChecks( device_option=hu.cpu_do, op=op, inputs=inputs, outputs_to_check=param, outputs_with_grads=outputs_with_grads, threshold=0.01, stepsize=0.005, )
def gru_base(self, create_rnn, ref, outputs_with_grads, input_tensor, fwd_only, drop_states, linear_before_reset, gc, dc): print("GRU test parameters: ", locals()) t, n, d = input_tensor.shape assert d % 3 == 0 d = d // 3 ref = partial(ref, drop_states=drop_states, linear_before_reset=linear_before_reset) with core.DeviceScope(gc): net = _prepare_rnn( t, n, d, create_rnn, outputs_with_grads=outputs_with_grads, memory_optim=False, forget_bias=0.0, forward_only=fwd_only, drop_states=drop_states, linear_before_reset=linear_before_reset, num_states=1, )[1] # here we don't provide a real input for the net but just for one of # its ops (RecurrentNetworkOp). So have to hardcode this name workspace.FeedBlob("test_name_scope/external/recurrent/i2h", input_tensor, device_option=gc) op = net._net.op[-1] inputs = [workspace.FetchBlob(name) for name in op.input] self.assertReferenceChecks( gc, op, inputs, ref, input_device_options={"timestep": hu.cpu_do}, outputs_to_check=list(range(2)), ) # Checking for input, gates_t_w and gates_t_b gradients if not fwd_only: for param in range(2): print("Check param {}".format(param)) self.assertGradientChecks( device_option=gc, op=op, inputs=inputs, outputs_to_check=param, outputs_with_grads=outputs_with_grads, threshold=0.001, stepsize=0.005, input_device_options={"timestep": hu.cpu_do}, )
def test_unroll_lstm(self, input_tensor, dim_out, outputs_with_grads, **kwargs): lstms = [ _prepare_rnn( *input_tensor.shape, create_rnn=rnn_cell.LSTM, outputs_with_grads=outputs_with_grads, T=T, two_d_initial_states=False, dim_out=dim_out, **kwargs ) for T in [input_tensor.shape[0], None] ] outputs, nets, inputs = zip(*lstms) workspace.FeedBlob(inputs[0][-1], input_tensor) assert inputs[0] == inputs[1] gradient_checker.NetGradientChecker.CompareNets( nets, outputs, outputs_with_grads, inputs_with_grads=inputs[0], )