def _convert_lstm(converter: KerasConverter, k_op: "keras.layers.LSTM"): assert k_op.stateful is False, "[KerasConverter] Currently, LSTM.stateful is not supported" assert k_op.go_backwards is False, "[KerasConverter] Currently, LSTM.go_backwards is not supported" x = converter.get_variable(converter.get_input_tensor(k_op)[0]) w_input = converter.convert_to_constant_variable(k_op.kernel, OrderCN) w_hidden = converter.convert_to_constant_variable(k_op.recurrent_kernel, OrderCN) if k_op.use_bias: b = converter.convert_to_constant_variable(k_op.bias, OrderC) else: b = None y, c = LSTM(None, k_op.use_bias, k_op.return_sequences, use_initial_c=False, use_initial_h=False, activation=k_op.activation.__name__, recurrent_activation=k_op.recurrent_activation.__name__)( x, w_input, w_hidden, b) k_outputs = converter.get_output_tensor(k_op) converter.set_variable(k_outputs[0], y) if k_op.return_state: converter.set_variable(k_outputs[1], None) converter.set_variable(k_outputs[2], c)
def _convert_lstm(converter: KerasConverter, k_op: "keras.layers.LSTM"): assert k_op.stateful is False, "[KerasConverter] Currently, LSTM.stateful is not supported" assert k_op.go_backwards is False, "[KerasConverter] Currently, LSTM.go_backwards is not supported" # Structure of LSTM layer was changed in v2.0.9 (https://github.com/fchollet/keras/pull/7943) if "2.0.9" <= keras.__version__: cell = k_op.cell # type: keras.layers.LSTMCell else: cell = k_op # type: keras.layers.LSTM x = converter.get_variable(converter.get_input_tensor(k_op)[0]) x.order.unify(OrderNTC) w_input = converter.convert_to_constant_variable(cell.kernel, OrderCN) w_hidden = converter.convert_to_constant_variable(cell.recurrent_kernel, OrderCN) if k_op.use_bias: b = converter.convert_to_constant_variable(cell.bias, OrderC) else: b = None y, c = LSTM(None, cell.use_bias, k_op.return_sequences, use_initial_c=False, use_initial_h=False, activation=cell.activation.__name__, recurrent_activation=cell.recurrent_activation.__name__)(x, w_input, w_hidden, b) k_outputs = converter.get_output_tensor(k_op) converter.set_variable(k_outputs[0], y) if k_op.return_state: converter.set_variable(k_outputs[1], None) converter.set_variable(k_outputs[2], c)
def generate_graph_model2(caption_net, hidden_num): # inputs var_input_img = Variable([1, 1, hidden_num], OrderNTC) var_input_word = Variable([1, 1], OrderNT) var_switch_img = Variable([1, 1, hidden_num], OrderNTC) var_switch_word = Variable([1, 1, hidden_num], OrderNTC) var_last_h = Variable([1, hidden_num], OrderNC) var_last_c = Variable([1, hidden_num], OrderNC) # prepare for lstm var_emb_word, = Embedding(None)(var_input_word, ConstantVariable( caption_net.word_vec.W.data, OrderCN)) # OrderNTC var_lstm_input = (var_emb_word * var_switch_word) + \ (var_input_img * var_switch_img) # lstm lstm_opr = LSTM(None, use_bias=True, return_sequences=False, activation="tanh", recurrent_activation="sigmoid", use_initial_h=True, use_initial_c=True) w_input = _convert_lstm_to_webdnn_order(caption_net.lstm.upward.W.data.T) w_hidden = _convert_lstm_to_webdnn_order(caption_net.lstm.lateral.W.data.T) b = _convert_lstm_to_webdnn_order( caption_net.lstm.upward.b.data[None, :])[0] var_lstm_h, var_lstm_c = lstm_opr( x=var_lstm_input, w_input=ConstantVariable(w_input, OrderCN), w_hidden=ConstantVariable(w_hidden, OrderCN), b=ConstantVariable(b, OrderC), initial_h=var_last_h, initial_c=var_last_c) # word probability var_word_score, = Linear(None)(var_lstm_h, ConstantVariable( caption_net.out_word.W.data.T, OrderCN)) var_word_score_biased, = AxiswiseBias(None, axis=Axis.C)( var_word_score, ConstantVariable(caption_net.out_word.b.data, OrderC)) var_word_prob, = Softmax(None, axis=Axis.C)(var_word_score_biased) return Graph([ var_input_img, var_input_word, var_switch_img, var_switch_word, var_last_h, var_last_c ], [var_word_prob, var_lstm_h, var_lstm_c])
def test_t_is_10_nonzero_c_sequence_output(): np.random.seed(2) N = 1 T = 10 C1 = 128 C2 = 64 vx = np.random.normal(size=(N, T, C1)).astype(np.float32) vw_input = np.random.normal(size=(C1, C2 * 4)).astype(np.float32) vw_hidden = np.random.normal(size=(C2, C2 * 4)).astype(np.float32) vb = np.random.normal(size=(C2 * 4,)).astype(np.float32) vc_in = np.random.normal(size=(N, C2)).astype(np.float32) vc_out = vc_in.copy() vh_in = np.random.normal(size=(N, C2)).astype(np.float32) vh = vh_in vw_input_c = _convert_to_chainer_order(vw_input) vw_hidden_c = _convert_to_chainer_order(vw_hidden) vb_c = _convert_to_chainer_order(vb[None, :]) vh_sequence = [] for i in range(T): vc_out, vh = lstm(vc_out, linear(vx[:, i, :], vw_input_c.T) + linear(vh, vw_hidden_c.T) + vb_c) vh_sequence.append(vh.data) vh = np.array(vh_sequence).transpose((1, 0, 2)) # TNC -> NTC vc_out = vc_out.data x = Variable(vx.shape, order=OrderNTC) c_in = ConstantVariable(vc_in, order=OrderNC) vh_in = ConstantVariable(vh_in, order=OrderNC) w_input = ConstantVariable(vw_input, order=OrderCN) w_hidden = ConstantVariable(vw_hidden, order=OrderCN) b = ConstantVariable(vb, order=OrderC) y, c_out = LSTM(None, return_sequences=True, use_bias=True, use_initial_c=True, use_initial_h=True, activation="tanh", recurrent_activation="sigmoid")(x, w_input, w_hidden, b, initial_c=c_in, initial_h=vh_in) generate_kernel_test_case( description=f"LSTM t=10 initial_c,initial_h=nonzero sequence_out", backend=["webassembly", "webgpu"], graph=Graph([x], [y, c_out]), inputs={x: vx}, expected={y: vh, c_out: vc_out}, EPS=1e-3, ABS_EPS=1e-7 )