def deep_rnn_model(input_dim, units, recur_layers, output_dim=29): """ Build a deep recurrent network for speech """ # Main acoustic input input_data = Input(name='the_input', shape=(None, input_dim)) # TODO: Add recurrent layers, each with batch normalization prev_input = input_data simple_rnn = [] bn_cnn = [] for i in range(0, recur_layers): #Iterate over each layer and add them to an array # Add recurrent layers with batch normalization simple_rnn_aux = GRU(units, return_sequences=True, implementation=2, name="rnn" + str(i))(prev_input) simple_rnn.append(simple_rnn_aux) # Add batch normalization to each layer bn_cnn_aux = BatchNormalization(name="bn_conv_1d" + str(i))( simple_rnn[i]) bn_cnn.append(bn_cnn_aux) prev_input = bn_cnn[i] # TODO: Add a TimeDistributed(Dense(output_dim)) layer # Use as input the last bn_cnn time_dense = TimeDistributed(Dense(output_dim))(bn_cnn[recur_layers - 1]) time_dense.supports_masking = True # Add softmax activation layer y_pred = Activation('softmax', name='softmax')(time_dense) # Specify the model model = Model(inputs=input_data, outputs=y_pred) model.output_length = lambda x: x print(model.summary()) return model
def cnn_rnn_model(input_dim, filters, kernel_size, conv_stride, conv_border_mode, units, output_dim=29): """ Build a recurrent + convolutional network for speech """ # Main acoustic input input_data = Input(name='the_input', shape=(None, input_dim)) # Add convolutional layer conv_1d = Conv1D(filters, kernel_size, strides=conv_stride, padding=conv_border_mode, activation='relu', name='conv1d')(input_data) # Add batch normalization bn_cnn = BatchNormalization(name='bn_conv_1d')(conv_1d) # Add a recurrent layer simp_rnn = SimpleRNN(units, activation='relu', return_sequences=True, implementation=2, name='rnn')(bn_cnn) # TODO: Add batch normalization bn_rnn = BatchNormalization(name='bn_simple_rnn')(simp_rnn) # TODO: Add a TimeDistributed(Dense(output_dim)) layer time_dense = TimeDistributed(Dense(output_dim))(bn_rnn) time_dense.supports_masking = True # Add softmax activation layer y_pred = Activation('softmax', name='softmax')(time_dense) # Specify the model model = Model(inputs=input_data, outputs=y_pred) model.output_length = lambda x: cnn_output_length( x, kernel_size, conv_border_mode, conv_stride) print(model.summary()) return model
def rnn_model(input_dim, units, activation, output_dim=29): """ Build a recurrent network for speech """ # Main acoustic input input_data = Input(name='the_input', shape=(None, input_dim)) # Add recurrent layer simp_rnn = GRU(units, activation=activation, return_sequences=True, implementation=2, name='rnn')(input_data) # TODO: Add batch normalization bn_cnn = BatchNormalization(name='bn_conv_1d')(simp_rnn) # TODO: Add a TimeDistributed(Dense(output_dim)) layer time_dense = TimeDistributed(Dense(output_dim))(bn_cnn) time_dense.supports_masking = True # Add softmax activation layer y_pred = Activation('softmax', name='softmax')(time_dense) # Specify the model model = Model(inputs=input_data, outputs=y_pred) model.output_length = lambda x: x print(model.summary()) return model
def Seq2Seq(output_dim, output_length, batch_input_shape=None, input_shape=None, batch_size=None, input_dim=None, input_length=None, hidden_dim=None, depth=1, broadcast_state=True, unroll=False, stateful=False, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0.): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size, ) + input_shape elif input_dim: if input_length: shape = (batch_size, ) + (input_length, ) + (input_dim, ) else: shape = (batch_size, ) + (None, ) + (input_dim, ) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state, unroll=unroll, stateful=stateful, return_states=broadcast_state) for _ in range(depth[0]): encoder.add( LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim))) encoder.add(Dropout(dropout)) dense1 = TimeDistributed(Dense(hidden_dim)) dense1.supports_masking = True dense2 = Dense(output_dim) decoder = RecurrentSequential(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, decode=True, output_length=output_length, unroll=unroll, stateful=stateful, teacher_force=teacher_force) for _ in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add( LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim))) _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True encoded_seq = dense1(_input) encoded_seq = encoder(encoded_seq) if broadcast_state: assert type(encoded_seq) is list states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = None encoded_seq = dense2(encoded_seq) inputs = [_input] if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor._keras_history[0].supports_masking = True inputs += [truth_tensor] decoded_seq = decoder(encoded_seq, ground_truth=inputs[1] if teacher_force else None, initial_readout=encoded_seq, initial_state=states) model = Model(inputs, decoded_seq) model.encoder = encoder model.decoder = decoder return model
def Seq2Seq(output_dim, output_length, hidden_dim=None, depth=1, broadcast_state=True, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0., **kwargs): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' if type(depth) == int: depth = [depth, depth] if 'batch_input_shape' in kwargs: shape = kwargs['batch_input_shape'] del kwargs['batch_input_shape'] elif 'input_shape' in kwargs: shape = (None,) + tuple(kwargs['input_shape']) del kwargs['input_shape'] elif 'input_dim' in kwargs: if 'input_length' in kwargs: shape = (None, kwargs['input_length'], kwargs['input_dim']) del kwargs['input_length'] else: shape = (None, None, kwargs['input_dim']) del kwargs['input_dim'] if 'unroll' in kwargs: unroll = kwargs['unroll'] del kwargs['unroll'] else: unroll = False if 'stateful' in kwargs: stateful = kwargs['stateful'] del kwargs['stateful'] else: stateful = False if not hidden_dim: hidden_dim = output_dim encoder = RecurrentContainer(readout=True, state_sync=inner_broadcast_state, input_length=shape[1], unroll=unroll, stateful=stateful, return_states=broadcast_state) for i in range(depth[0]): encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim), **kwargs)) encoder.add(Dropout(dropout)) dense1 = TimeDistributed(Dense(hidden_dim)) dense1.supports_masking = True dense2 = Dense(output_dim) decoder = RecurrentContainer(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, output_length=output_length, unroll=unroll, stateful=stateful, decode=True, input_length=shape[1]) for i in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim), **kwargs)) input = Input(batch_shape=shape) input._keras_history[0].supports_masking = True encoded_seq = dense1(input) encoded_seq = encoder(encoded_seq) if broadcast_state: states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = [None] * 2 encoded_seq = dense2(encoded_seq) inputs = [input] if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor._keras_history[0].supports_masking = True inputs += [truth_tensor] decoded_seq = decoder({'input': encoded_seq, 'ground_truth': inputs[1] if teacher_force else None, 'initial_readout': encoded_seq, 'states': states}) model = Model(inputs, decoded_seq) model.encoder = encoder model.decoder = decoder return model
def Seq2Seq(output_dim, output_length, lookup_matrix, hidden_dim=None, depth=1, broadcast_state=True, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0., **kwargs): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' if type(depth) == int: depth = [depth, depth] if 'batch_input_shape' in kwargs: shape = kwargs['batch_input_shape'] del kwargs['batch_input_shape'] elif 'input_shape' in kwargs: shape = (None,) + tuple(kwargs['input_shape']) del kwargs['input_shape'] elif 'input_dim' in kwargs: if 'input_length' in kwargs: shape = (None, kwargs['input_length'], kwargs['input_dim']) del kwargs['input_length'] else: shape = (None, None, kwargs['input_dim']) del kwargs['input_dim'] if 'unroll' in kwargs: unroll = kwargs['unroll'] del kwargs['unroll'] else: unroll = False if 'stateful' in kwargs: stateful = kwargs['stateful'] del kwargs['stateful'] else: stateful = False if not hidden_dim: hidden_dim = output_dim encoder = RecurrentContainer(readout=True, state_sync=inner_broadcast_state, input_length=shape[1], unroll=unroll, stateful=stateful, return_states=broadcast_state) for i in range(depth[0]): encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim), **kwargs)) encoder.add(Dropout(dropout)) dense1 = TimeDistributed(Dense(hidden_dim)) dense1.supports_masking = True dense2 = Dense(output_dim) decoder = RecurrentContainer(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, output_length=output_length, unroll=unroll, stateful=stateful, decode=True, input_length=shape[1]) for i in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim), **kwargs)) input = Input(batch_shape=(shape[0],shape[1])) print input.shape embedded_input = Embedding(input_dim=lookup_matrix.shape[0], output_dim=lookup_matrix.shape[1], weights=[lookup_matrix])(input) print embedded_input.shape input._keras_history[0].supports_masking = True encoded_seq = dense1(embedded_input) # print encoded_seq.shape encoded_seq = encoder(encoded_seq) print encoded_seq if broadcast_state: states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = [None] * 2 encoded_seq = dense2(encoded_seq) inputs = [input] if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor._keras_history[0].supports_masking = True inputs += [truth_tensor] decoded_seq = decoder({'input': encoded_seq, 'ground_truth': inputs[1] if teacher_force else None, 'initial_readout': encoded_seq, 'states': states}) model = Model(inputs, decoded_seq) model.encoder = encoder model.decoder = decoder print "==========Input=========" print model.input print "==========Input=========" print model.output return model
def Seq2Seq(output_dim, output_length, batch_input_shape=None, input_shape=None, batch_size=None, input_dim=None, input_length=None, hidden_dim=None, depth=1, broadcast_state=True, unroll=False, stateful=False, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0.): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' ''' Below block is used for computing the shape - batch_input_shape=(batch_size, timesteps, data_dim) batch_size creates a statefull LSTM while None makes it unstateful ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size,) + input_shape elif input_dim: if input_length: shape = (batch_size,) + (input_length,) + (input_dim,) else: shape = (batch_size,) + (None,) + (input_dim,) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim ''' Sequential model :- https://keras.io/layers/recurrent/ unroll - Nothing important return_state - Boolean. Whether to return the last state in addition to the output. ''' encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state, unroll=unroll, stateful=stateful, return_states=broadcast_state) for _ in range(depth[0]): encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim))) encoder.add(Dropout(dropout)) ''' TimeDistributed :- https://keras.io/layers/wrappers/ ''' dense1 = TimeDistributed(Dense(hidden_dim)) dense1.supports_masking = True dense2 = Dense(output_dim) ''' Readout lets you feed the output of your RNN from the previous time step back to the current time step. ''' decoder = RecurrentSequential(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, decode=True, output_length=output_length, unroll=unroll, stateful=stateful, teacher_force=teacher_force) for _ in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim))) _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True encoded_seq = dense1(_input) encoded_seq = encoder(encoded_seq) if broadcast_state: assert type(encoded_seq) is list states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = None encoded_seq = dense2(encoded_seq) inputs = [_input] if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor._keras_history[0].supports_masking = Trueoutput_dim inputs += [truth_tensor] decoded_seq = decoder(encoded_seq, ground_truth=inputs[1] if teacher_force else None, initial_readout=encoded_seq, initial_state=states) model = Model(inputs, decoded_seq) model.encoder = encoder model.decoder = decoder return model
def Seq2Seq(output_dim, output_length, batch_input_shape=None, input_shape=None, batch_size=None, input_dim=None, input_length=None, hidden_dim=None, depth=1, broadcast_state=True, unroll=False, stateful=False, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0.): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' if isinstance(depth, int): depth = (depth, depth) # depth是整数时,相当于编码器和解码器都有相同的层数 if batch_input_shape: shape = batch_input_shape # 批输入的shape作为模型输入的shape elif input_shape: shape = (batch_size,) + input_shape # 不指定batch input shape,则用批大小拼接input shape,如batch size为32,input为768,拼接后就是(32,768) # input shape 必须是一个元组 elif input_dim: if input_length: shape = (batch_size,) + (input_length,) + (input_dim,) # 一般情况下通用的shape(批大小,输入序列长度,输入维度) else: shape = (batch_size,) + (None,) + (input_dim,) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim # 隐藏层的维度如果也是None?那代表什么呢 encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state, unroll=unroll, stateful=stateful, return_states=broadcast_state) ''' 参数: readout:是否额外将输出进行处理 选项有add(True),multiply,average,maximum等 state_sync:状态是否在内部传播,源码中对initial_states的处理不一样,对每个cell的state都进行传播到下一个batch stateful:keras特性,在不同的batch之间传递cells的状态,而不是仅仅在cell之间传递状态,即stateful 在stateful = True 时,我们要在fit中手动使得shuffle = False。随后,在X[i](表示输入矩阵中第 i个sample)这个小序列训练完之后,Keras会将将训练完的记忆参数传递给X[i+bs](表示第i+bs个sample), 作为其初始的记忆参数。 unroll:keras特性,将LSTM网络展开,也就是原本的时序序列直接展开成多个cell拼接,可以加快速度,但是占用更多内存 ''' for _ in range(depth[0]): encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim))) encoder.add(Dropout(dropout)) # 根据depth[0]指定编码器深度 dense1 = TimeDistributed(Dense(hidden_dim)) ''' # dence1: # 使用TimeDistributed层对1个batch中样本(input_length,input_dim)每个向量都进行Dense操作,在整个length长度下,这个样本 # 都共享TimeDistributed层的权重,即输出后变成(batch_size,input_length,hidden_dim) ''' dense1.supports_masking = True dense2 = Dense(output_dim) ''' dence2: 处理从encoder之后的编码,整型为output_dim,再送给decoder ''' decoder = RecurrentSequential(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, decode=True, output_length=output_length, unroll=unroll, stateful=stateful, teacher_force=teacher_force) ''' 参数: teaching force :它每次不使用上一个state的输出作为下一个state的输入,而是直接 使用训练数据的标准答案(ground truth)的对应上一项作为下一个state的输入。 结合beam search和计划抽样,使用一个概率p来决定使用teaching还是free training,随着训练epoch增加, 概率p也会减少,相当于逐步的减小teaching的采样频率,确保模型既能快速学习,又有泛化能力 ''' for _ in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim))) # 根据depth[1]指定解码器的深度 _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True encoded_seq = dense1(_input) # 对输入数据先通过TimeDistributed层,处理成hidden_dim的向量维度 encoded_seq = encoder(encoded_seq) # 再通过encoder编码 # 以下是一些选项的处理,是否广播状态,是否teaching模式等 if broadcast_state: assert type(encoded_seq) is list states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = None encoded_seq = dense2(encoded_seq) inputs = [_input] if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor._keras_history[0].supports_masking = True inputs += [truth_tensor] # 编码之后的后续处理 # 解码,initial_state是否接受从编码器传递过来的状态, decoded_seq = decoder(encoded_seq, ground_truth=inputs[1] if teacher_force else None, initial_readout=encoded_seq, initial_state=states) seq2seq_model = Model(inputs, decoded_seq) # 整个模型就是从输入到解码seq,可以将编码器单独拿出来,使用其中的编码 # 另外,模型处理的实时新闻序列到股价波动序列,如果要将休盘期内新闻信息也纳入训练, # 则需要共享编码器和解码权重,并增加新的Flatten和Dence层,将解码器输出序列视为波动编码,再进入Dence输出标量 # 涉及到,在RecurrentSequential后增加Sequencial序列 seq2seq_model.encoder = encoder seq2seq_model.decoder = decoder decoded_vec = Flatten()(decoded_seq) decoded_vec = Dense(1, activation='tanh')(decoded_vec) seq2vec_model = Model(inputs, [decoded_seq, decoded_vec]) # 最终模型有1个输入,2个输出 return seq2vec_model
def Seq2Seq(output_dim, output_length, batch_input_shape=None, input_shape=None, batch_size=None, input_dim=None, input_length=None, hidden_dim=None, depth=1, broadcast_state=True, unroll=False, stateful=False, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0.): if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size, ) + input_shape elif input_dim: if input_length: shape = (batch_size, ) + (input_length, ) + (input_dim, ) else: shape = (batch_size, ) + (None, ) + (input_dim, ) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state, unroll=unroll, stateful=stateful, return_states=broadcast_state) for _ in range(depth[0]): encoder.add( LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim))) encoder.add(Dropout(dropout)) dense1 = TimeDistributed(Dense(hidden_dim)) dense1.supports_masking = True dense2 = Dense(output_dim) decoder = RecurrentSequential(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, decode=True, output_length=output_length, unroll=unroll, stateful=stateful, teacher_force=teacher_force) for _ in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add( LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim))) _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True encoded_seq = dense1(_input) encoded_seq = encoder(encoded_seq) if broadcast_state: assert type(encoded_seq) is list states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = None encoded_seq = dense2(encoded_seq) inputs = [_input] if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor._keras_history[0].supports_masking = True inputs += [truth_tensor] decoded_seq = decoder(encoded_seq, ground_truth=inputs[1] if teacher_force else None, initial_readout=encoded_seq, initial_state=states) model = Model(inputs, decoded_seq) model.encoder = encoder model.decoder = decoder return model
def SkipThoughtModel(sent_len, vocab_size, embed_dims, output_length, output_dim, dropout=0.4, unroll=False, teacher_force=False): input_sent = Input(shape=(sent_len, vocab_size), dtype=K.floatx()) input_sent._keras_history[0].supports_masking = True encoder = RecurrentContainer(readout=True, input_length=sent_len, unroll=unroll, stateful=False) # for i in range(depth[0]): encoder.add(LSTMCell(embed_dims, batch_input_shape=(None, embed_dims))) encoder.add(Dropout(dropout)) dense1 = TimeDistributed(Dense(embed_dims)) dense1.supports_masking = True dense2 = Dense(embed_dims) encoded_seq = dense1(input) encoded_seq = encoder(encoded_seq) states = [None] * 2 encoded_seq = dense2(encoded_seq) inputs = [input] if teacher_force: truth_tensor_prev = Input(batch_shape=(None, output_length, output_dim)) truth_tensor_prev._keras_history[0].supports_masking = True truth_tensor_next = Input(batch_shape=(None, output_length, output_dim)) truth_tensor_next._keras_history[0].supports_masking = True inputs += [truth_tensor_prev, truth_tensor_next] prev_decoder = build_decoder(dropout=dropout, unroll=unroll, output_length=output_length) next_decoder = build_decoder() prev_decoded_seq = prev_decoder({ 'input': encoded_seq, 'ground_truth': inputs[1] if teacher_force else None, 'initial_readout': encoded_seq, 'states': states }) next_decoded_seq = next_decoder({ 'input': encoded_seq, 'ground_truth': inputs[2] if teacher_force else None, 'initial_readout': encoded_seq, 'states': states }) model = Model(inputs, [prev_decoded_seq, next_decoded_seq]) model.encoder = encoder model.decoders = [prev_decoder, next_decoder] return model
def SkipThoughtModel_new(output_dim, output_length, batch_input_shape=None, input_shape=None, batch_size=None, input_dim=None, input_length=None, hidden_dim=None, depth=1, broadcast_state=True, unroll=False, stateful=False, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0.): ''' Seq2seq model based on [1] and [2]. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. Arguments: - output_dim : Required output dimension. - hidden_dim : The dimension of the internal representations of the model. - output_length : Length of the required output sequence. - depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. - broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. - inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. - peek : Specifies if the decoder should be able to peek at the context vector at every timestep. - dropout : Dropout probability in between layers. Returns: Keras model to be trained. ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size, ) + input_shape elif input_dim: if input_length: shape = (batch_size, ) + (input_length, ) + (input_dim, ) else: shape = (batch_size, ) + (None, ) + (input_dim, ) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state, unroll=unroll, stateful=stateful, return_states=broadcast_state) for _ in range(depth[0]): encoder.add( LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim))) encoder.add(Dropout(dropout)) dense1 = TimeDistributed(Dense(hidden_dim)) dense1.supports_masking = True dense2 = Dense(output_dim) decoder_next = RecurrentSequential( readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, decode=True, output_length=output_length, unroll=unroll, stateful=stateful, teacher_force=teacher_force) decoder_prev = RecurrentSequential( readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, decode=True, output_length=output_length, unroll=unroll, stateful=stateful, teacher_force=teacher_force) for _ in range(depth[1]): decoder_next.add( Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder_next.add( LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim))) decoder_prev.add( Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder_prev.add( LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim))) _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True encoded_seq = dense1(_input) encoded_seq = encoder(encoded_seq) if broadcast_state: assert type(encoded_seq) is list states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = None encoded_seq = dense2(encoded_seq) inputs = [_input] if teacher_force: truth_tensor_next = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor_next._keras_history[0].supports_masking = True truth_tensor_prev = Input(batch_shape=(None, output_length, output_dim)) truth_tensor_prev._keras_history[0].supports_masking = True inputs += [truth_tensor_prev, truth_tensor_next] prev_decoded_seq = decoder_prev( encoded_seq, ground_truth=inputs[1] if teacher_force else None, initial_readout=encoded_seq, initial_state=states) next_decoded_seq = decoder_next( encoded_seq, ground_truth=inputs[2] if teacher_force else None, initial_readout=encoded_seq, initial_state=states) model = Model(inputs, [prev_decoded_seq, next_decoded_seq]) model.encoder = encoder model.decoder = [decoder_prev, decoder_next] return model