def create_model(dbn, input_shape, input_var, mask_shape, mask_var, lstm_size=250, win=T.iscalar('theta)'), output_classes=26, w_init_fn=GlorotUniform, use_peepholes=False, use_blstm=True): weights, biases, shapes, nonlinearities = dbn gate_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1] l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1') l_encoder = create_pretrained_encoder(l_reshape1, weights, biases, shapes, nonlinearities, ['fc1', 'fc2', 'fc3', 'bottleneck']) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') l_delta = DeltaLayer(l_reshape2, win, name='delta') if use_blstm: l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'blstm1', use_peepholes) # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1') # reshape, flatten to 2 dimensions to run softmax on all timesteps l_reshape3 = ReshapeLayer(l_sum1, (-1, lstm_size), name='reshape3') else: l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_reshape3 = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape3') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer( l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output') return l_out
def create_model(substreams, mask_shape, mask_var, lstm_size=250, output_classes=26, fusiontype='concat', w_init_fn=las.init.Orthogonal(), use_peepholes=True): gate_parameters = Gate(W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_seqlen_raw = l_mask.input_var.shape[1] # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. if fusiontype == 'adasum': l_fuse = AdaptiveElemwiseSumLayer(substreams, name='adasum1') elif fusiontype == 'sum': l_fuse = ElemwiseSumLayer(substreams, name='sum1') elif fusiontype == 'concat': l_fuse = ConcatLayer(substreams, axis=-1, name='concat') f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg') l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2') # reshape to (num_examples * seq_len, lstm_size) l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer(l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_raw, output_classes), name='output') return l_out, l_fuse
def create_model(input_shape, input_var, mask_shape, mask_var, window, lstm_size=250, output_classes=26, w_init=las.init.GlorotUniform(), use_peepholes=False, use_blstm=True): gate_parameters = Gate(W_in=w_init, W_hid=w_init, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init, W_hid=w_init, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, name='mask') symbolic_seqlen = l_in.input_var.shape[1] l_delta = DeltaLayer(l_in, window, name='delta') if use_blstm: f_lstm, b_lstm = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum') # reshape to (num_examples * seq_len, lstm_size) l_reshape = ReshapeLayer(l_sum, (-1, lstm_size), name='reshape') else: l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_reshape = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer(l_reshape, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output') return l_out
def create_model_using_pretrained_encoder(weights, biases, input_shape, input_var, mask_shape, mask_var, lstm_size=250, win=T.iscalar('theta'), output_classes=26, w_init_fn=las.init.Orthogonal(), use_peepholes=False, nonlinearities=rectify): gate_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1] l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1') l_encoder = create_pretrained_encoder(l_reshape1, weights, biases, [2000, 1000, 500, 50], [nonlinearities, nonlinearities, nonlinearities, linear], ['fc1', 'fc2', 'fc3', 'bottleneck']) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') l_delta = DeltaLayer(l_reshape2, win, name='delta') l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'bstm1', use_peepholes) # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1') l_forward_slice1 = SliceLayer(l_sum1, -1, 1, name='slice1') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_out = DenseLayer( l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output') return l_out
def create_model(s1_ae, s2_ae, s3_ae, s1_shape, s1_var, s2_shape, s2_var, s3_shape, s3_var, mask_shape, mask_var, lstm_size=250, lstm2_size=250, win=T.iscalar('theta)'), output_classes=26, fusiontype='concat', w_init_fn=las.init.Orthogonal(), use_peepholes=True): s1_bn_weights, s1_bn_biases, s1_bn_shapes, s1_bn_nonlinearities = s1_ae s2_weights, s2_biases, s2_shapes, s2_nonlinearities = s2_ae s3_weights, s3_biases, s3_shapes, s3_nonlinearities = s3_ae gate_parameters = Gate(W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_s1 = InputLayer(s1_shape, s1_var, 's1_im') l_mask = InputLayer(mask_shape, mask_var, 'mask') l_s2 = InputLayer(s2_shape, s2_var, 's2_im') l_s3 = InputLayer(s3_shape, s3_var, 's3_im') symbolic_batchsize_s1 = l_s1.input_var.shape[0] symbolic_seqlen_s1 = l_s1.input_var.shape[1] symbolic_batchsize_s2 = l_s2.input_var.shape[0] symbolic_seqlen_s2 = l_s2.input_var.shape[1] symbolic_batchsize_s3 = l_s3.input_var.shape[0] symbolic_seqlen_s3 = l_s3.input_var.shape[1] l_reshape1_s1 = ReshapeLayer(l_s1, (-1, s1_shape[-1]), name='reshape1_s1') l_encoder_s1 = create_pretrained_encoder( l_reshape1_s1, s1_bn_weights, s1_bn_biases, s1_bn_shapes, s1_bn_nonlinearities, ['fc1_s1', 'fc2_s1', 'fc3_s1', 'bottleneck_s1']) s1_len = las.layers.get_output_shape(l_encoder_s1)[-1] l_reshape2_s1 = ReshapeLayer( l_encoder_s1, (symbolic_batchsize_s1, symbolic_seqlen_s1, s1_len), name='reshape2_s1') l_delta_s1 = DeltaLayer(l_reshape2_s1, win, name='delta_s1') l_delta_s1_dropout = DropoutLayer(l_delta_s1, name='dropout_s1') # s2 images l_reshape1_s2 = ReshapeLayer(l_s2, (-1, s2_shape[-1]), name='reshape1_s2') l_encoder_s2 = create_pretrained_encoder( l_reshape1_s2, s2_weights, s2_biases, s2_shapes, s2_nonlinearities, ['fc1_s2', 'fc2_s2', 'fc3_s2', 'bottleneck_s2']) s2_len = las.layers.get_output_shape(l_encoder_s2)[-1] l_reshape2_s2 = ReshapeLayer( l_encoder_s2, (symbolic_batchsize_s2, symbolic_seqlen_s2, s2_len), name='reshape2_s2') l_delta_s2 = DeltaLayer(l_reshape2_s2, win, name='delta_s2') l_delta_s2_dropout = DropoutLayer(l_delta_s2, name='dropout_s2') # s3 images l_reshape1_s3 = ReshapeLayer(l_s3, (-1, s3_shape[-1]), name='reshape1_s3') l_encoder_s3 = create_pretrained_encoder( l_reshape1_s3, s3_weights, s3_biases, s3_shapes, s3_nonlinearities, ['fc1_s3', 'fc2_s3', 'fc3_s3', 'bottleneck_s3']) s3_len = las.layers.get_output_shape(l_encoder_s3)[-1] l_reshape2_s3 = ReshapeLayer( l_encoder_s3, (symbolic_batchsize_s3, symbolic_seqlen_s3, s3_len), name='reshape2_s3') l_delta_s3 = DeltaLayer(l_reshape2_s3, win, name='delta_s3') l_delta_s3_dropout = DropoutLayer(l_delta_s3, name='dropout_s3') l_lstm_s1 = LSTMLayer( l_delta_s1_dropout, lstm_size * 2, peepholes=use_peepholes, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm_s1') l_lstm_s2 = LSTMLayer( l_delta_s2_dropout, lstm_size * 2, peepholes=use_peepholes, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm_s2') l_lstm_s3 = LSTMLayer( l_delta_s3_dropout, lstm_size * 2, peepholes=use_peepholes, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm_s3') # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. if fusiontype == 'adasum': l_fuse = AdaptiveElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3], name='adasum1') elif fusiontype == 'sum': l_fuse = ElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3], name='sum1') elif fusiontype == 'concat': l_fuse = ConcatLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3], axis=-1, name='concat') l_fuse_dropout = DropoutLayer(l_fuse, name='concat_dropout') f_lstm_agg, b_lstm_agg = create_blstm(l_fuse_dropout, l_mask, lstm2_size, cell_parameters, gate_parameters, 'lstm_agg') l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2') # reshape to (num_examples * seq_len, lstm_size) l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size * 2), name='reshape3') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer(l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_s1, output_classes), name='output') return l_out, l_fuse
def create_model(dbn, input_shape, input_var, mask_shape, mask_var, dct_shape, dct_var, lstm_size=250, win=T.iscalar('theta)'), output_classes=26, fusiontype='sum', w_init_fn=las.init.GlorotUniform(), use_peepholes=False, nonlinearities=rectify): weights, biases, shapes, nonlinearities = dbn names = ['fc1', 'fc2', 'fc3', 'bottleneck'] gate_parameters = Gate(W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') l_dct = InputLayer(dct_shape, dct_var, 'dct') symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1] l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1') l_encoder = create_pretrained_encoder(l_reshape1, weights, biases, shapes, nonlinearities, names) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer( l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') l_delta = DeltaLayer(l_reshape2, win, name='delta') l_delta_dct = DeltaLayer(l_dct, win, name='delta_dct') l_lstm_bn = LSTMLayer( l_delta, lstm_size, peepholes=use_peepholes, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm_bn') l_lstm_dct = LSTMLayer( l_delta_dct, lstm_size, peepholes=use_peepholes, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm_dct') # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. if fusiontype == 'sum': l_fuse = ElemwiseSumLayer([l_lstm_bn, l_lstm_dct], name='sum1') elif fusiontype == 'adasum': l_fuse = AdaptiveElemwiseSumLayer([l_lstm_bn, l_lstm_dct], name='adasum') elif fusiontype == 'concat': l_fuse = ConcatLayer([l_lstm_bn, l_lstm_dct], axis=2, name='concat') else: raise ValueError(message='Unsupported Fusion Type used!') f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg') l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2') # reshape to (num_examples * seq_len, lstm_size) l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3') # l_forward_slice1 = SliceLayer(l_sum2, -1, 1, name='slice1') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer(l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output') return l_out, l_fuse
def create_model(s1_ae, s2_ae, s3_ae, s1_shape, s1_var, s2_shape, s2_var, s3_shape, s3_var, mask_shape, mask_var, lstm_size=250, win=T.iscalar('theta)'), output_classes=26, fusiontype='concat', w_init_fn=las.init.Orthogonal(), use_peepholes=True): s1_bn_weights, s1_bn_biases, s1_bn_shapes, s1_bn_nonlinearities = s1_ae s2_weights, s2_biases, s2_shapes, s2_nonlinearities = s2_ae s3_weights, s3_biases, s3_shapes, s3_nonlinearities = s3_ae gate_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_s1 = InputLayer(s1_shape, s1_var, 's1_im') l_mask = InputLayer(mask_shape, mask_var, 'mask') l_s2 = InputLayer(s2_shape, s2_var, 's2_im') l_s3 = InputLayer(s3_shape, s3_var, 's3_im') symbolic_batchsize_s1 = l_s1.input_var.shape[0] symbolic_seqlen_s1 = l_s1.input_var.shape[1] symbolic_batchsize_s2 = l_s2.input_var.shape[0] symbolic_seqlen_s2 = l_s2.input_var.shape[1] symbolic_batchsize_s3 = l_s3.input_var.shape[0] symbolic_seqlen_s3 = l_s3.input_var.shape[1] l_reshape1_s1 = ReshapeLayer(l_s1, (-1, s1_shape[-1]), name='reshape1_s1') l_encoder_s1 = create_pretrained_encoder(l_reshape1_s1, s1_bn_weights, s1_bn_biases, s1_bn_shapes, s1_bn_nonlinearities, ['fc1_s1', 'fc2_s1', 'fc3_s1', 'bottleneck_s1']) s1_len = las.layers.get_output_shape(l_encoder_s1)[-1] l_reshape2_s1 = ReshapeLayer(l_encoder_s1, (symbolic_batchsize_s1, symbolic_seqlen_s1, s1_len), name='reshape2_s1') l_delta_s1 = DeltaLayer(l_reshape2_s1, win, name='delta_s1') # s2 images l_reshape1_s2 = ReshapeLayer(l_s2, (-1, s2_shape[-1]), name='reshape1_s2') l_encoder_s2 = create_pretrained_encoder(l_reshape1_s2, s2_weights, s2_biases, s2_shapes, s2_nonlinearities, ['fc1_s2', 'fc2_s2', 'fc3_s2', 'bottleneck_s2']) s2_len = las.layers.get_output_shape(l_encoder_s2)[-1] l_reshape2_s2 = ReshapeLayer(l_encoder_s2, (symbolic_batchsize_s2, symbolic_seqlen_s2, s2_len), name='reshape2_s2') l_delta_s2 = DeltaLayer(l_reshape2_s2, win, name='delta_s2') # s3 images l_reshape1_s3 = ReshapeLayer(l_s3, (-1, s3_shape[-1]), name='reshape1_s3') l_encoder_s3 = create_pretrained_encoder(l_reshape1_s3, s3_weights, s3_biases, s3_shapes, s3_nonlinearities, ['fc1_s3', 'fc2_s3', 'fc3_s3', 'bottleneck_s3']) s3_len = las.layers.get_output_shape(l_encoder_s3)[-1] l_reshape2_s3 = ReshapeLayer(l_encoder_s3, (symbolic_batchsize_s3, symbolic_seqlen_s3, s3_len), name='reshape2_s3') l_delta_s3 = DeltaLayer(l_reshape2_s3, win, name='delta_s3') l_lstm_s1 = LSTMLayer( l_delta_s1, int(lstm_size), peepholes=use_peepholes, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm_s1') l_lstm_s2 = LSTMLayer( l_delta_s2, lstm_size, peepholes=use_peepholes, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm_s2') l_lstm_s3 = LSTMLayer( l_delta_s3, lstm_size, peepholes=use_peepholes, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm_s3') # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. if fusiontype == 'adasum': l_fuse = AdaptiveElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3], name='adasum1') elif fusiontype == 'sum': l_fuse = ElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3], name='sum1') elif fusiontype == 'concat': l_fuse = ConcatLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3], axis=-1, name='concat') f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg') l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2') # reshape to (num_examples * seq_len, lstm_size) l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer( l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_s1, output_classes), name='output') return l_out, l_fuse
def create_model(dbn, input_shape, input_var, mask_shape, mask_var, dct_shape, dct_var, lstm_size=250, win=T.iscalar('theta)'), output_classes=26, fusiontype='sum', w_init_fn=las.init.GlorotUniform(), use_peepholes=False, nonlinearities=rectify): weights, biases, shapes, nonlinearities = dbn names = ['fc1', 'fc2', 'fc3', 'bottleneck'] gate_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') l_dct = InputLayer(dct_shape, dct_var, 'dct') symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1] l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1') l_encoder = create_pretrained_encoder(l_reshape1, weights, biases, shapes, nonlinearities, names) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') l_delta = DeltaLayer(l_reshape2, win, name='delta') l_delta_dct = DeltaLayer(l_dct, win, name='delta_dct') l_lstm_bn = LSTMLayer( l_delta, lstm_size, peepholes=use_peepholes, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm_bn') l_lstm_dct = LSTMLayer( l_delta_dct, lstm_size, peepholes=use_peepholes, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm_dct') # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. if fusiontype == 'sum': l_fuse = ElemwiseSumLayer([l_lstm_bn, l_lstm_dct], name='sum1') elif fusiontype == 'adasum': l_fuse = AdaptiveElemwiseSumLayer([l_lstm_bn, l_lstm_dct], name='adasum') elif fusiontype == 'concat': l_fuse = ConcatLayer([l_lstm_bn, l_lstm_dct], axis=2, name='concat') else: raise ValueError(message='Unsupported Fusion Type used!') f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg') l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2') # reshape to (num_examples * seq_len, lstm_size) l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3') # l_forward_slice1 = SliceLayer(l_sum2, -1, 1, name='slice1') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer( l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output') return l_out, l_fuse
def create_pretrained_model(s1_ae, s1_lstm, s2_ae, s2_lstm, s3_ae, s3_lstm, s4_ae, s4_lstm, s5_ae, s5_lstm, s1_shape, s1_var, s2_shape, s2_var, s3_shape, s3_var, s4_shape, s4_var, s5_shape, s5_var, mask_shape, mask_var, lstm_size=250, win=T.iscalar('theta)'), output_classes=26, fusiontype='concat', w_init_fn=las.init.Orthogonal(), use_peepholes=True, use_blstm_substream=False): s1_bn_weights, s1_bn_biases, s1_bn_shapes, s1_bn_nonlinearities = s1_ae s2_weights, s2_biases, s2_shapes, s2_nonlinearities = s2_ae s3_weights, s3_biases, s3_shapes, s3_nonlinearities = s3_ae s4_weights, s4_biases, s4_shapes, s4_nonlinearities = s4_ae s5_weights, s5_biases, s5_shapes, s5_nonlinearities = s5_ae gate_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_s1 = InputLayer(s1_shape, s1_var, 's1_im') l_mask = InputLayer(mask_shape, mask_var, 'mask') l_s2 = InputLayer(s2_shape, s2_var, 's2_im') l_s3 = InputLayer(s3_shape, s3_var, 's3_im') l_s4 = InputLayer(s4_shape, s4_var, 's4_im') l_s5 = InputLayer(s5_shape, s5_var, 's5_im') symbolic_batchsize_s1 = l_s1.input_var.shape[0] symbolic_seqlen_s1 = l_s1.input_var.shape[1] symbolic_batchsize_s2 = l_s2.input_var.shape[0] symbolic_seqlen_s2 = l_s2.input_var.shape[1] symbolic_batchsize_s3 = l_s3.input_var.shape[0] symbolic_seqlen_s3 = l_s3.input_var.shape[1] symbolic_batchsize_s4 = l_s4.input_var.shape[0] symbolic_seqlen_s4 = l_s4.input_var.shape[1] symbolic_batchsize_s5 = l_s5.input_var.shape[0] symbolic_seqlen_s5 = l_s5.input_var.shape[1] l_reshape1_s1 = ReshapeLayer(l_s1, (-1, s1_shape[-1]), name='reshape1_s1') l_encoder_s1 = create_pretrained_encoder(l_reshape1_s1, s1_bn_weights, s1_bn_biases, s1_bn_shapes, s1_bn_nonlinearities, ['fc1_s1', 'fc2_s1', 'fc3_s1', 'bottleneck_s1']) s1_len = las.layers.get_output_shape(l_encoder_s1)[-1] l_reshape2_s1 = ReshapeLayer(l_encoder_s1, (symbolic_batchsize_s1, symbolic_seqlen_s1, s1_len), name='reshape2_s1') l_delta_s1 = DeltaLayer(l_reshape2_s1, win, name='delta_s1') # s2 images l_reshape1_s2 = ReshapeLayer(l_s2, (-1, s2_shape[-1]), name='reshape1_s2') l_encoder_s2 = create_pretrained_encoder(l_reshape1_s2, s2_weights, s2_biases, s2_shapes, s2_nonlinearities, ['fc1_s2', 'fc2_s2', 'fc3_s2', 'bottleneck_s2']) s2_len = las.layers.get_output_shape(l_encoder_s2)[-1] l_reshape2_s2 = ReshapeLayer(l_encoder_s2, (symbolic_batchsize_s2, symbolic_seqlen_s2, s2_len), name='reshape2_s2') l_delta_s2 = DeltaLayer(l_reshape2_s2, win, name='delta_s2') # s3 images l_reshape1_s3 = ReshapeLayer(l_s3, (-1, s3_shape[-1]), name='reshape1_s3') l_encoder_s3 = create_pretrained_encoder(l_reshape1_s3, s3_weights, s3_biases, s3_shapes, s3_nonlinearities, ['fc1_s3', 'fc2_s3', 'fc3_s3', 'bottleneck_s3']) s3_len = las.layers.get_output_shape(l_encoder_s3)[-1] l_reshape2_s3 = ReshapeLayer(l_encoder_s3, (symbolic_batchsize_s3, symbolic_seqlen_s3, s3_len), name='reshape2_s3') l_delta_s3 = DeltaLayer(l_reshape2_s3, win, name='delta_s3') # s4 images l_reshape1_s4 = ReshapeLayer(l_s4, (-1, s4_shape[-1]), name='reshape1_s4') l_encoder_s4 = create_pretrained_encoder(l_reshape1_s4, s4_weights, s4_biases, s4_shapes, s4_nonlinearities, ['fc1_s4', 'fc2_s4', 'fc3_s4', 'bottleneck_s4']) s4_len = las.layers.get_output_shape(l_encoder_s4)[-1] l_reshape2_s4 = ReshapeLayer(l_encoder_s4, (symbolic_batchsize_s4, symbolic_seqlen_s4, s4_len), name='reshape2_s4') l_delta_s4 = DeltaLayer(l_reshape2_s4, win, name='delta_s4') # s5 images l_reshape1_s5 = ReshapeLayer(l_s5, (-1, s5_shape[-1]), name='reshape1_s5') l_encoder_s5 = create_pretrained_encoder(l_reshape1_s5, s5_weights, s5_biases, s5_shapes, s5_nonlinearities, ['fc1_s5', 'fc2_s5', 'fc3_s5', 'bottleneck_s5']) s5_len = las.layers.get_output_shape(l_encoder_s5)[-1] l_reshape2_s5 = ReshapeLayer(l_encoder_s5, (symbolic_batchsize_s5, symbolic_seqlen_s5, s5_len), name='reshape2_s5') l_delta_s5 = DeltaLayer(l_reshape2_s5, win, name='delta_s5') if not use_blstm_substream: l_lstm_s1 = create_pretrained_lstm(s1_lstm, 'f_lstm', l_delta_s1, l_mask, lstm_size, cell_parameters, gate_parameters, 'f_lstm_s1', use_peepholes) l_lstm_s2 = create_pretrained_lstm(s2_lstm, 'f_lstm', l_delta_s2, l_mask, lstm_size, cell_parameters, gate_parameters, 'f_lstm_s2', use_peepholes) l_lstm_s3 = create_pretrained_lstm(s3_lstm, 'f_lstm', l_delta_s3, l_mask, lstm_size, cell_parameters, gate_parameters, 'f_lstm_s3', use_peepholes) l_lstm_s4 = create_pretrained_lstm(s4_lstm, 'f_lstm', l_delta_s4, l_mask, lstm_size, cell_parameters, gate_parameters, 'f_lstm_s4', use_peepholes) l_lstm_s5 = create_pretrained_lstm(s5_lstm, 'f_lstm', l_delta_s5, l_mask, lstm_size, cell_parameters, gate_parameters, 'f_lstm_s5', use_peepholes) else: f_lstm_s1 = create_pretrained_lstm(s1_lstm, 'f_lstm', l_delta_s1, l_mask, lstm_size, cell_parameters, gate_parameters, 'f_lstm_s1', use_peepholes) b_lstm_s1 = create_pretrained_lstm(s1_lstm, 'b_lstm', l_delta_s1, l_mask, lstm_size, cell_parameters, gate_parameters, 'b_lstm_s1', use_peepholes, backwards=True) l_lstm_s1 = ElemwiseSumLayer([f_lstm_s1, b_lstm_s1], name='sum_b_lstm_s1') f_lstm_s2 = create_pretrained_lstm(s2_lstm, 'f_lstm', l_delta_s2, l_mask, lstm_size, cell_parameters, gate_parameters, 'f_lstm_s2', use_peepholes) b_lstm_s2 = create_pretrained_lstm(s2_lstm, 'b_lstm', l_delta_s2, l_mask, lstm_size, cell_parameters, gate_parameters, 'b_lstm_s2', use_peepholes, backwards=True) l_lstm_s2 = ElemwiseSumLayer([f_lstm_s2, b_lstm_s2], name='sum_b_lstm_s2') f_lstm_s3 = create_pretrained_lstm(s3_lstm, 'f_lstm', l_delta_s3, l_mask, lstm_size, cell_parameters, gate_parameters, 'f_lstm_s3', use_peepholes) b_lstm_s3 = create_pretrained_lstm(s3_lstm, 'b_lstm', l_delta_s3, l_mask, lstm_size, cell_parameters, gate_parameters, 'b_lstm_s3', use_peepholes, backwards=True) l_lstm_s3 = ElemwiseSumLayer([f_lstm_s3, b_lstm_s3], name='sum_b_lstm_s3') f_lstm_s4 = create_pretrained_lstm(s4_lstm, 'f_lstm', l_delta_s4, l_mask, lstm_size, cell_parameters, gate_parameters, 'f_lstm_s4', use_peepholes) b_lstm_s4 = create_pretrained_lstm(s4_lstm, 'b_lstm', l_delta_s4, l_mask, lstm_size, cell_parameters, gate_parameters, 'b_lstm_s4', use_peepholes, backwards=True) l_lstm_s4 = ElemwiseSumLayer([f_lstm_s4, b_lstm_s4], name='sum_b_lstm_s4') f_lstm_s5 = create_pretrained_lstm(s5_lstm, 'f_lstm', l_delta_s5, l_mask, lstm_size, cell_parameters, gate_parameters, 'f_lstm_s5', use_peepholes) b_lstm_s5 = create_pretrained_lstm(s5_lstm, 'b_lstm', l_delta_s5, l_mask, lstm_size, cell_parameters, gate_parameters, 'b_lstm_s5', use_peepholes, backwards=True) l_lstm_s5 = ElemwiseSumLayer([f_lstm_s5, b_lstm_s5], name='sum_b_lstm_s5') # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. if fusiontype == 'adasum': l_fuse = AdaptiveElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3,l_lstm_s4,l_lstm_s5], name='adasum1') elif fusiontype == 'sum': l_fuse = ElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3,l_lstm_s4,l_lstm_s5], name='sum1') elif fusiontype == 'concat': l_fuse = ConcatLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3,l_lstm_s4,l_lstm_s5], axis=-1, name='concat') f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg') l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2') # reshape to (num_examples * seq_len, lstm_size) l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer( l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_s1, output_classes), name='output') return l_out, l_fuse
def load_saved_model(model_path, stream_params, input_shape, input_var, mask_shape, mask_var, lstm_size=250, win=T.iscalar('theta)'), output_classes=26, w_init_fn=GlorotUniform(), use_peepholes=False, use_blstm=True): """ loads a saved model :param model_path: path to model parameters :param stream_params: stream parameters in a tuple of ([layer 1 dimension, ..., layer N dimension], [layer 1 nonlinearity, ..., layer N nonlinearity] :param input_shape: input shape eg: (None, None, 1500) :param input_var: input theano variable :param mask_shape: mask shape eg: (None, None) if variable lengths :param mask_var: mask theano variable :param lstm_size: number of lstm units for lstm layer :param win: window theano variable :param output_classes: number of output classes :param w_init_fn: weight initialization function used for initializing model :param use_peepholes: use peepholes for lstm layers :return: saved model """ shapes, nonlinearities = stream_params gate_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1] l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1') l_encoder = create_encoder(l_reshape1, shapes, nonlinearities, ['fc1', 'fc2', 'fc3', 'bottleneck']) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') l_delta = DeltaLayer(l_reshape2, win, name='delta') if use_blstm: l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1') # reshape, flatten to 2 dimensions to run softmax on all timesteps l_reshape3 = ReshapeLayer(l_sum1, (-1, lstm_size), name='reshape3') else: l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_reshape3 = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape3') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer( l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output') load_model_params(l_out, model_path) return l_out