def test_mul(self, T, n, d): model = ModelHelperBase(name='external') input_blob, initial_input_blob = model.net.AddExternalInputs( 'input', 'initial_input') step = ModelHelperBase(name='step', param_model=model) input_t, output_t_prev = step.net.AddExternalInput( 'input_t', 'output_t_prev') output_t = step.net.Mul([input_t, output_t_prev]) step.net.AddExternalOutput(output_t) self.simple_rnn(T, n, d, model, step, input_t, output_t, output_t_prev, input_blob, initial_input_blob)
def test_dropout(self, n, m, gc, dc): X = np.random.rand(n, m).astype(np.float32) - 0.5 workspace.FeedBlob("x", X) model = ModelHelperBase(name="test_model") out = model_helpers.Dropout(model, "x", "out") workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(model.net) out = workspace.FetchBlob("x") np.testing.assert_equal(X, out)
def test_fc(self, n, m, k, gc, dc): X = np.random.rand(m, k).astype(np.float32) - 0.5 workspace.FeedBlob("x", X) model = ModelHelperBase(name="test_model") out = model_helpers.FC(model, "x", "out_1", k, n) out = model_helpers.PackedFC(model, out, "out_2", n, n) out = model_helpers.FC_Decomp(model, out, "out_3", n, n) out = model_helpers.FC_Prune(model, out, "out_4", n, n) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(model.net)
def test_mul_rnn(self, T, n, d): model = ModelHelperBase(name='external') one_blob = model.param_init_net.ConstantFill( [], value=1.0, shape=[1, n, d]) input_blob = model.net.AddExternalInput('input') step = ModelHelperBase(name='step', param_model=model) input_t, output_t_prev = step.net.AddExternalInput( 'input_t', 'output_t_prev') output_t = step.net.Mul([input_t, output_t_prev]) step.net.AddExternalOutput(output_t) recurrent.recurrent_net( net=model.net, cell_net=step.net, inputs=[(input_t, input_blob)], initial_cell_inputs=[(output_t_prev, one_blob)], links={output_t_prev: output_t}, scope="test_mul_rnn", ) workspace.FeedBlob( str(input_blob), np.random.randn(T, n, d).astype(np.float32)) workspace.RunNetOnce(model.param_init_net) op = model.net._net.op[-1] def reference(input, initial_input): recurrent_input = initial_input result = np.zeros(shape=input.shape) for t_cur in range(T): recurrent_input = recurrent_input * input[t_cur] result[t_cur] = recurrent_input shape = list(input.shape) shape[0] = 1 return (result, result[-1].reshape(shape)) def grad_reference(output_grad, ref_output, inputs): input = inputs[0] output = ref_output[0] initial_input = inputs[1] input_grad = np.zeros(shape=input.shape) right_grad = 0 for t_cur in range(T - 1, -1, -1): prev_output = output[t_cur - 1] if t_cur > 0 else initial_input input_grad[t_cur] = (output_grad[t_cur] + right_grad) * prev_output right_grad = input[t_cur] * (output_grad[t_cur] + right_grad) return (input_grad, right_grad.reshape([1, n, d])) self.assertReferenceChecks( device_option=hu.cpu_do, op=op, inputs=[ workspace.FetchBlob(name) for name in [input_blob, one_blob] ], reference=reference, grad_reference=grad_reference, output_to_grad=op.output[0], outputs_to_check=[0, 1], )
def test_lstm(self, t, n, d): model = ModelHelperBase(name='external') input_blob, seq_lengths, hidden_init, cell_init = ( model.net.AddExternalInputs('input_blob', 'seq_lengths', 'hidden_init', 'cell_init')) recurrent.LSTM(model, input_blob, seq_lengths, (hidden_init, cell_init), d, d, scope="external/recurrent") op = model.net._net.op[-1] def extract_param_name(model, param_substr): result = [] for p in model.params: if param_substr in str(p): result.append(str(p)) assert len(result) == 1 return result[0] gates = { gate: extract_param_name(model, gate) for gate in ["gates_t_b", "gates_t_w"] } workspace.RunNetOnce(model.param_init_net) def reference(input, hidden_input, cell_input, gates_w, gates_b, seq_lengths): T = input.shape[0] N = input.shape[1] G = input.shape[2] D = hidden_input.shape[2] hidden = np.zeros(shape=(T + 1, N, D)) cell = np.zeros(shape=(T + 1, N, D)) assert hidden.shape[0] == T + 1 assert cell.shape[0] == T + 1 assert hidden.shape[1] == N assert cell.shape[1] == N cell[0, :, :] = cell_input hidden[0, :, :] = hidden_input for t in range(T): timestep = np.asarray([t]).astype(np.int32) input_t = input[t].reshape(1, N, G) hidden_t_prev = hidden[t].reshape(1, N, D) cell_t_prev = cell[t].reshape(1, N, D) gates = np.dot(hidden_t_prev, gates_w.T) + gates_b gates = gates + input_t hidden_t, cell_t = lstm_unit(cell_t_prev, gates, seq_lengths, timestep) hidden[t + 1] = hidden_t cell[t + 1] = cell_t return (hidden[1:], hidden[-1].reshape(1, N, D), cell[1:], cell[-1].reshape(1, N, D)) input_blob = op.input[0] workspace.FeedBlob(str(input_blob), np.random.randn(t, n, d * 4).astype(np.float32)) workspace.FeedBlob("hidden_init", np.random.randn(1, n, d).astype(np.float32)) workspace.FeedBlob("cell_init", np.random.randn(1, n, d).astype(np.float32)) workspace.FeedBlob( "seq_lengths", np.random.randint(0, t, size=(n, )).astype(np.int32)) self.assertReferenceChecks( hu.cpu_do, op, [ workspace.FetchBlob(name) for name in [ input_blob, "hidden_init", "cell_init", gates["gates_t_w"], gates["gates_t_b"], "seq_lengths" ] ], reference, ) # Checking for input, gates_t_w and gates_t_b gradients for param in [0, 3, 4]: self.assertGradientChecks( hu.cpu_do, op, [ workspace.FetchBlob(name) for name in [ input_blob, "hidden_init", "cell_init", gates["gates_t_w"], gates["gates_t_b"], "seq_lengths" ] ], param, [0], threshold=0.01, )