Python create_blstm示例，custom.layers.create_blstm Python示例

示例#1

0

显示文件

文件： deltanet_majority_vote.py 项目： redforg/end-to-end-multiview-lipreading

def create_model(dbn, input_shape, input_var, mask_shape, mask_var,
                 lstm_size=250, win=T.iscalar('theta)'),
                 output_classes=26, w_init_fn=GlorotUniform, use_peepholes=False, use_blstm=True):

    weights, biases, shapes, nonlinearities = dbn

    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_pretrained_encoder(l_reshape1, weights, biases,
                                          shapes,
                                          nonlinearities,
                                          ['fc1', 'fc2', 'fc3', 'bottleneck'])
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2')
    l_delta = DeltaLayer(l_reshape2, win, name='delta')

    if use_blstm:
        l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'blstm1',
                                           use_peepholes)

        # We'll combine the forward and backward layer output by summing.
        # Merge layers take in lists of layers to merge as input.
        l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1')
        # reshape, flatten to 2 dimensions to run softmax on all timesteps
        l_reshape3 = ReshapeLayer(l_sum1, (-1, lstm_size), name='reshape3')
    else:
        l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
        l_reshape3 = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape3')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output')

    return l_out

示例#2

0

显示文件

文件： avnet.py 项目： konatasick/ip-avsr

def create_model(substreams,
                 mask_shape,
                 mask_var,
                 lstm_size=250,
                 output_classes=26,
                 fusiontype='concat',
                 w_init_fn=las.init.Orthogonal(),
                 use_peepholes=True):

    gate_parameters = Gate(W_in=w_init_fn,
                           W_hid=w_init_fn,
                           b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn,
        W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None,
        b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_mask = InputLayer(mask_shape, mask_var, 'mask')
    symbolic_seqlen_raw = l_mask.input_var.shape[1]

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    if fusiontype == 'adasum':
        l_fuse = AdaptiveElemwiseSumLayer(substreams, name='adasum1')
    elif fusiontype == 'sum':
        l_fuse = ElemwiseSumLayer(substreams, name='sum1')
    elif fusiontype == 'concat':
        l_fuse = ConcatLayer(substreams, axis=-1, name='concat')

    f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size,
                                          cell_parameters, gate_parameters,
                                          'lstm_agg')
    l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')

    # reshape to (num_examples * seq_len, lstm_size)
    l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(l_reshape3,
                           num_units=output_classes,
                           nonlinearity=las.nonlinearities.softmax,
                           name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_raw, output_classes),
                         name='output')

    return l_out, l_fuse

示例#3

0

显示文件

def create_model(input_shape,
                 input_var,
                 mask_shape,
                 mask_var,
                 window,
                 lstm_size=250,
                 output_classes=26,
                 w_init=las.init.GlorotUniform(),
                 use_peepholes=False,
                 use_blstm=True):
    gate_parameters = Gate(W_in=w_init, W_hid=w_init, b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init,
        W_hid=w_init,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None,
        b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, name='mask')

    symbolic_seqlen = l_in.input_var.shape[1]
    l_delta = DeltaLayer(l_in, window, name='delta')

    if use_blstm:
        f_lstm, b_lstm = create_blstm(l_delta, l_mask, lstm_size,
                                      cell_parameters, gate_parameters, 'lstm',
                                      use_peepholes)
        l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum')
        # reshape to (num_examples * seq_len, lstm_size)
        l_reshape = ReshapeLayer(l_sum, (-1, lstm_size), name='reshape')
    else:
        l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters,
                             gate_parameters, 'lstm', use_peepholes)
        l_reshape = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(l_reshape,
                           num_units=output_classes,
                           nonlinearity=las.nonlinearities.softmax,
                           name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes),
                         name='output')
    return l_out

示例#4

0

显示文件

文件： deltanet.py 项目： lzuwei/ip-avsr

def create_model_using_pretrained_encoder(weights, biases, input_shape, input_var, mask_shape, mask_var,
                                          lstm_size=250, win=T.iscalar('theta'), output_classes=26,
                                          w_init_fn=las.init.Orthogonal(),
                                          use_peepholes=False, nonlinearities=rectify):
    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_pretrained_encoder(l_reshape1, weights, biases,
                                          [2000, 1000, 500, 50],
                                          [nonlinearities, nonlinearities, nonlinearities, linear],
                                          ['fc1', 'fc2', 'fc3', 'bottleneck'])
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2')
    l_delta = DeltaLayer(l_reshape2, win, name='delta')

    l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'bstm1',
                                       use_peepholes)

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1')

    l_forward_slice1 = SliceLayer(l_sum1, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_out = DenseLayer(
        l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output')

    return l_out

示例#5

0

显示文件

文件： adenet_3stream_dropout.py 项目： redforg/end-to-end-multiview-lipreading

def create_model(s1_ae,
                 s2_ae,
                 s3_ae,
                 s1_shape,
                 s1_var,
                 s2_shape,
                 s2_var,
                 s3_shape,
                 s3_var,
                 mask_shape,
                 mask_var,
                 lstm_size=250,
                 lstm2_size=250,
                 win=T.iscalar('theta)'),
                 output_classes=26,
                 fusiontype='concat',
                 w_init_fn=las.init.Orthogonal(),
                 use_peepholes=True):

    s1_bn_weights, s1_bn_biases, s1_bn_shapes, s1_bn_nonlinearities = s1_ae
    s2_weights, s2_biases, s2_shapes, s2_nonlinearities = s2_ae
    s3_weights, s3_biases, s3_shapes, s3_nonlinearities = s3_ae

    gate_parameters = Gate(W_in=w_init_fn,
                           W_hid=w_init_fn,
                           b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn,
        W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None,
        b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_s1 = InputLayer(s1_shape, s1_var, 's1_im')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')
    l_s2 = InputLayer(s2_shape, s2_var, 's2_im')
    l_s3 = InputLayer(s3_shape, s3_var, 's3_im')

    symbolic_batchsize_s1 = l_s1.input_var.shape[0]
    symbolic_seqlen_s1 = l_s1.input_var.shape[1]
    symbolic_batchsize_s2 = l_s2.input_var.shape[0]
    symbolic_seqlen_s2 = l_s2.input_var.shape[1]
    symbolic_batchsize_s3 = l_s3.input_var.shape[0]
    symbolic_seqlen_s3 = l_s3.input_var.shape[1]

    l_reshape1_s1 = ReshapeLayer(l_s1, (-1, s1_shape[-1]), name='reshape1_s1')
    l_encoder_s1 = create_pretrained_encoder(
        l_reshape1_s1, s1_bn_weights, s1_bn_biases, s1_bn_shapes,
        s1_bn_nonlinearities, ['fc1_s1', 'fc2_s1', 'fc3_s1', 'bottleneck_s1'])
    s1_len = las.layers.get_output_shape(l_encoder_s1)[-1]

    l_reshape2_s1 = ReshapeLayer(
        l_encoder_s1, (symbolic_batchsize_s1, symbolic_seqlen_s1, s1_len),
        name='reshape2_s1')
    l_delta_s1 = DeltaLayer(l_reshape2_s1, win, name='delta_s1')
    l_delta_s1_dropout = DropoutLayer(l_delta_s1, name='dropout_s1')

    # s2 images
    l_reshape1_s2 = ReshapeLayer(l_s2, (-1, s2_shape[-1]), name='reshape1_s2')
    l_encoder_s2 = create_pretrained_encoder(
        l_reshape1_s2, s2_weights, s2_biases, s2_shapes, s2_nonlinearities,
        ['fc1_s2', 'fc2_s2', 'fc3_s2', 'bottleneck_s2'])
    s2_len = las.layers.get_output_shape(l_encoder_s2)[-1]
    l_reshape2_s2 = ReshapeLayer(
        l_encoder_s2, (symbolic_batchsize_s2, symbolic_seqlen_s2, s2_len),
        name='reshape2_s2')
    l_delta_s2 = DeltaLayer(l_reshape2_s2, win, name='delta_s2')
    l_delta_s2_dropout = DropoutLayer(l_delta_s2, name='dropout_s2')

    # s3 images
    l_reshape1_s3 = ReshapeLayer(l_s3, (-1, s3_shape[-1]), name='reshape1_s3')
    l_encoder_s3 = create_pretrained_encoder(
        l_reshape1_s3, s3_weights, s3_biases, s3_shapes, s3_nonlinearities,
        ['fc1_s3', 'fc2_s3', 'fc3_s3', 'bottleneck_s3'])
    s3_len = las.layers.get_output_shape(l_encoder_s3)[-1]
    l_reshape2_s3 = ReshapeLayer(
        l_encoder_s3, (symbolic_batchsize_s3, symbolic_seqlen_s3, s3_len),
        name='reshape2_s3')
    l_delta_s3 = DeltaLayer(l_reshape2_s3, win, name='delta_s3')
    l_delta_s3_dropout = DropoutLayer(l_delta_s3, name='dropout_s3')

    l_lstm_s1 = LSTMLayer(
        l_delta_s1_dropout,
        lstm_size * 2,
        peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters,
        forgetgate=gate_parameters,
        cell=cell_parameters,
        outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True,
        grad_clipping=5.,
        name='lstm_s1')

    l_lstm_s2 = LSTMLayer(
        l_delta_s2_dropout,
        lstm_size * 2,
        peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters,
        forgetgate=gate_parameters,
        cell=cell_parameters,
        outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True,
        grad_clipping=5.,
        name='lstm_s2')

    l_lstm_s3 = LSTMLayer(
        l_delta_s3_dropout,
        lstm_size * 2,
        peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters,
        forgetgate=gate_parameters,
        cell=cell_parameters,
        outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True,
        grad_clipping=5.,
        name='lstm_s3')

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    if fusiontype == 'adasum':
        l_fuse = AdaptiveElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3],
                                          name='adasum1')
    elif fusiontype == 'sum':
        l_fuse = ElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3],
                                  name='sum1')
    elif fusiontype == 'concat':
        l_fuse = ConcatLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3],
                             axis=-1,
                             name='concat')

    l_fuse_dropout = DropoutLayer(l_fuse, name='concat_dropout')
    f_lstm_agg, b_lstm_agg = create_blstm(l_fuse_dropout, l_mask, lstm2_size,
                                          cell_parameters, gate_parameters,
                                          'lstm_agg')
    l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')

    # reshape to (num_examples * seq_len, lstm_size)
    l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size * 2), name='reshape3')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(l_reshape3,
                           num_units=output_classes,
                           nonlinearity=las.nonlinearities.softmax,
                           name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_s1, output_classes),
                         name='output')

    return l_out, l_fuse

示例#6

0

显示文件

文件： adenet_v2.py 项目： konatasick/ip-avsr

def create_model(dbn,
                 input_shape,
                 input_var,
                 mask_shape,
                 mask_var,
                 dct_shape,
                 dct_var,
                 lstm_size=250,
                 win=T.iscalar('theta)'),
                 output_classes=26,
                 fusiontype='sum',
                 w_init_fn=las.init.GlorotUniform(),
                 use_peepholes=False,
                 nonlinearities=rectify):

    weights, biases, shapes, nonlinearities = dbn
    names = ['fc1', 'fc2', 'fc3', 'bottleneck']

    gate_parameters = Gate(W_in=w_init_fn,
                           W_hid=w_init_fn,
                           b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn,
        W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None,
        b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')
    l_dct = InputLayer(dct_shape, dct_var, 'dct')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_pretrained_encoder(l_reshape1, weights, biases, shapes,
                                          nonlinearities, names)
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(
        l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len),
        name='reshape2')
    l_delta = DeltaLayer(l_reshape2, win, name='delta')

    l_delta_dct = DeltaLayer(l_dct, win, name='delta_dct')

    l_lstm_bn = LSTMLayer(
        l_delta,
        lstm_size,
        peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters,
        forgetgate=gate_parameters,
        cell=cell_parameters,
        outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True,
        grad_clipping=5.,
        name='lstm_bn')

    l_lstm_dct = LSTMLayer(
        l_delta_dct,
        lstm_size,
        peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters,
        forgetgate=gate_parameters,
        cell=cell_parameters,
        outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True,
        grad_clipping=5.,
        name='lstm_dct')

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.

    if fusiontype == 'sum':
        l_fuse = ElemwiseSumLayer([l_lstm_bn, l_lstm_dct], name='sum1')
    elif fusiontype == 'adasum':
        l_fuse = AdaptiveElemwiseSumLayer([l_lstm_bn, l_lstm_dct],
                                          name='adasum')
    elif fusiontype == 'concat':
        l_fuse = ConcatLayer([l_lstm_bn, l_lstm_dct], axis=2, name='concat')
    else:
        raise ValueError(message='Unsupported Fusion Type used!')

    f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size,
                                          cell_parameters, gate_parameters,
                                          'lstm_agg')

    l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')

    # reshape to (num_examples * seq_len, lstm_size)
    l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3')

    # l_forward_slice1 = SliceLayer(l_sum2, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(l_reshape3,
                           num_units=output_classes,
                           nonlinearity=las.nonlinearities.softmax,
                           name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes),
                         name='output')

    return l_out, l_fuse

示例#7

0

显示文件

文件： adenet_3stream.py 项目： behtak/ip-avsr

def create_model(s1_ae, s2_ae, s3_ae, s1_shape, s1_var,
                 s2_shape, s2_var, s3_shape, s3_var,
                 mask_shape, mask_var,
                 lstm_size=250, win=T.iscalar('theta)'),
                 output_classes=26, fusiontype='concat', w_init_fn=las.init.Orthogonal(),
                 use_peepholes=True):

    s1_bn_weights, s1_bn_biases, s1_bn_shapes, s1_bn_nonlinearities = s1_ae
    s2_weights, s2_biases, s2_shapes, s2_nonlinearities = s2_ae
    s3_weights, s3_biases, s3_shapes, s3_nonlinearities = s3_ae

    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_s1 = InputLayer(s1_shape, s1_var, 's1_im')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')
    l_s2 = InputLayer(s2_shape, s2_var, 's2_im')
    l_s3 = InputLayer(s3_shape, s3_var, 's3_im')

    symbolic_batchsize_s1 = l_s1.input_var.shape[0]
    symbolic_seqlen_s1 = l_s1.input_var.shape[1]
    symbolic_batchsize_s2 = l_s2.input_var.shape[0]
    symbolic_seqlen_s2 = l_s2.input_var.shape[1]
    symbolic_batchsize_s3 = l_s3.input_var.shape[0]
    symbolic_seqlen_s3 = l_s3.input_var.shape[1]

    l_reshape1_s1 = ReshapeLayer(l_s1, (-1, s1_shape[-1]), name='reshape1_s1')
    l_encoder_s1 = create_pretrained_encoder(l_reshape1_s1, s1_bn_weights, s1_bn_biases, s1_bn_shapes, s1_bn_nonlinearities,
                                              ['fc1_s1', 'fc2_s1', 'fc3_s1', 'bottleneck_s1'])
    s1_len = las.layers.get_output_shape(l_encoder_s1)[-1]

    l_reshape2_s1 = ReshapeLayer(l_encoder_s1,
                                 (symbolic_batchsize_s1, symbolic_seqlen_s1, s1_len),
                                 name='reshape2_s1')
    l_delta_s1 = DeltaLayer(l_reshape2_s1, win, name='delta_s1')

    # s2 images
    l_reshape1_s2 = ReshapeLayer(l_s2, (-1, s2_shape[-1]), name='reshape1_s2')
    l_encoder_s2 = create_pretrained_encoder(l_reshape1_s2, s2_weights, s2_biases, s2_shapes,
                                             s2_nonlinearities,
                                             ['fc1_s2', 'fc2_s2', 'fc3_s2', 'bottleneck_s2'])
    s2_len = las.layers.get_output_shape(l_encoder_s2)[-1]
    l_reshape2_s2 = ReshapeLayer(l_encoder_s2,
                                 (symbolic_batchsize_s2, symbolic_seqlen_s2, s2_len),
                                 name='reshape2_s2')
    l_delta_s2 = DeltaLayer(l_reshape2_s2, win, name='delta_s2')

    # s3 images
    l_reshape1_s3 = ReshapeLayer(l_s3, (-1, s3_shape[-1]), name='reshape1_s3')
    l_encoder_s3 = create_pretrained_encoder(l_reshape1_s3, s3_weights, s3_biases, s3_shapes,
                                             s3_nonlinearities,
                                             ['fc1_s3', 'fc2_s3', 'fc3_s3', 'bottleneck_s3'])
    s3_len = las.layers.get_output_shape(l_encoder_s3)[-1]
    l_reshape2_s3 = ReshapeLayer(l_encoder_s3,
                                 (symbolic_batchsize_s3, symbolic_seqlen_s3, s3_len),
                                 name='reshape2_s3')
    l_delta_s3 = DeltaLayer(l_reshape2_s3, win, name='delta_s3')

    l_lstm_s1 = LSTMLayer(
        l_delta_s1, int(lstm_size), peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm_s1')

    l_lstm_s2 = LSTMLayer(
        l_delta_s2, lstm_size, peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm_s2')

    l_lstm_s3 = LSTMLayer(
        l_delta_s3, lstm_size, peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm_s3')

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    if fusiontype == 'adasum':
        l_fuse = AdaptiveElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3], name='adasum1')
    elif fusiontype == 'sum':
        l_fuse = ElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3], name='sum1')
    elif fusiontype == 'concat':
        l_fuse = ConcatLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3], axis=-1, name='concat')

    f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg')
    l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')

    # reshape to (num_examples * seq_len, lstm_size)
    l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape3, num_units=output_classes,
        nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_s1, output_classes), name='output')

    return l_out, l_fuse

示例#8

0

显示文件

文件： adenet_v2.py 项目： behtak/ip-avsr

def create_model(dbn, input_shape, input_var, mask_shape, mask_var,
                 dct_shape, dct_var, lstm_size=250, win=T.iscalar('theta)'),
                 output_classes=26, fusiontype='sum', w_init_fn=las.init.GlorotUniform(),
                 use_peepholes=False, nonlinearities=rectify):

    weights, biases, shapes, nonlinearities = dbn
    names = ['fc1', 'fc2', 'fc3', 'bottleneck']

    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')
    l_dct = InputLayer(dct_shape, dct_var, 'dct')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_pretrained_encoder(l_reshape1, weights, biases, shapes, nonlinearities, names)
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2')
    l_delta = DeltaLayer(l_reshape2, win, name='delta')

    l_delta_dct = DeltaLayer(l_dct, win, name='delta_dct')

    l_lstm_bn = LSTMLayer(
        l_delta, lstm_size, peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm_bn')

    l_lstm_dct = LSTMLayer(
        l_delta_dct, lstm_size, peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm_dct')

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.

    if fusiontype == 'sum':
        l_fuse = ElemwiseSumLayer([l_lstm_bn, l_lstm_dct], name='sum1')
    elif fusiontype == 'adasum':
        l_fuse = AdaptiveElemwiseSumLayer([l_lstm_bn, l_lstm_dct], name='adasum')
    elif fusiontype == 'concat':
        l_fuse = ConcatLayer([l_lstm_bn, l_lstm_dct], axis=2, name='concat')
    else:
        raise ValueError(message='Unsupported Fusion Type used!')

    f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg')

    l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')

    # reshape to (num_examples * seq_len, lstm_size)
    l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3')

    # l_forward_slice1 = SliceLayer(l_sum2, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output')

    return l_out, l_fuse

示例#9

0

显示文件

文件： adenet_5stream.py 项目： redforg/end-to-end-multiview-lipreading

def create_pretrained_model(s1_ae, s1_lstm,
                            s2_ae, s2_lstm,
                            s3_ae, s3_lstm,
                            s4_ae, s4_lstm,
                            s5_ae, s5_lstm,
                            s1_shape, s1_var,
                            s2_shape, s2_var,
                            s3_shape, s3_var,
                            s4_shape, s4_var,
                            s5_shape, s5_var,
                            mask_shape, mask_var,
                            lstm_size=250, win=T.iscalar('theta)'),
                            output_classes=26, fusiontype='concat', w_init_fn=las.init.Orthogonal(),
                            use_peepholes=True, use_blstm_substream=False):
    s1_bn_weights, s1_bn_biases, s1_bn_shapes, s1_bn_nonlinearities = s1_ae
    s2_weights, s2_biases, s2_shapes, s2_nonlinearities = s2_ae
    s3_weights, s3_biases, s3_shapes, s3_nonlinearities = s3_ae
    s4_weights, s4_biases, s4_shapes, s4_nonlinearities = s4_ae
    s5_weights, s5_biases, s5_shapes, s5_nonlinearities = s5_ae

    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_s1 = InputLayer(s1_shape, s1_var, 's1_im')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')
    l_s2 = InputLayer(s2_shape, s2_var, 's2_im')
    l_s3 = InputLayer(s3_shape, s3_var, 's3_im')
    l_s4 = InputLayer(s4_shape, s4_var, 's4_im')
    l_s5 = InputLayer(s5_shape, s5_var, 's5_im')

    symbolic_batchsize_s1 = l_s1.input_var.shape[0]
    symbolic_seqlen_s1 = l_s1.input_var.shape[1]
    symbolic_batchsize_s2 = l_s2.input_var.shape[0]
    symbolic_seqlen_s2 = l_s2.input_var.shape[1]
    symbolic_batchsize_s3 = l_s3.input_var.shape[0]
    symbolic_seqlen_s3 = l_s3.input_var.shape[1]
    symbolic_batchsize_s4 = l_s4.input_var.shape[0]
    symbolic_seqlen_s4 = l_s4.input_var.shape[1]
    symbolic_batchsize_s5 = l_s5.input_var.shape[0]
    symbolic_seqlen_s5 = l_s5.input_var.shape[1]

    l_reshape1_s1 = ReshapeLayer(l_s1, (-1, s1_shape[-1]), name='reshape1_s1')
    l_encoder_s1 = create_pretrained_encoder(l_reshape1_s1, s1_bn_weights, s1_bn_biases, s1_bn_shapes,
                                             s1_bn_nonlinearities,
                                             ['fc1_s1', 'fc2_s1', 'fc3_s1', 'bottleneck_s1'])
    s1_len = las.layers.get_output_shape(l_encoder_s1)[-1]

    l_reshape2_s1 = ReshapeLayer(l_encoder_s1,
                                 (symbolic_batchsize_s1, symbolic_seqlen_s1, s1_len),
                                 name='reshape2_s1')
    l_delta_s1 = DeltaLayer(l_reshape2_s1, win, name='delta_s1')

    # s2 images
    l_reshape1_s2 = ReshapeLayer(l_s2, (-1, s2_shape[-1]), name='reshape1_s2')
    l_encoder_s2 = create_pretrained_encoder(l_reshape1_s2, s2_weights, s2_biases, s2_shapes,
                                             s2_nonlinearities,
                                             ['fc1_s2', 'fc2_s2', 'fc3_s2', 'bottleneck_s2'])
    s2_len = las.layers.get_output_shape(l_encoder_s2)[-1]
    l_reshape2_s2 = ReshapeLayer(l_encoder_s2,
                                 (symbolic_batchsize_s2, symbolic_seqlen_s2, s2_len),
                                 name='reshape2_s2')
    l_delta_s2 = DeltaLayer(l_reshape2_s2, win, name='delta_s2')

    # s3 images
    l_reshape1_s3 = ReshapeLayer(l_s3, (-1, s3_shape[-1]), name='reshape1_s3')
    l_encoder_s3 = create_pretrained_encoder(l_reshape1_s3, s3_weights, s3_biases, s3_shapes,
                                             s3_nonlinearities,
                                             ['fc1_s3', 'fc2_s3', 'fc3_s3', 'bottleneck_s3'])
    s3_len = las.layers.get_output_shape(l_encoder_s3)[-1]
    l_reshape2_s3 = ReshapeLayer(l_encoder_s3,
                                 (symbolic_batchsize_s3, symbolic_seqlen_s3, s3_len),
                                 name='reshape2_s3')
    l_delta_s3 = DeltaLayer(l_reshape2_s3, win, name='delta_s3')

    # s4 images
    l_reshape1_s4 = ReshapeLayer(l_s4, (-1, s4_shape[-1]), name='reshape1_s4')
    l_encoder_s4 = create_pretrained_encoder(l_reshape1_s4, s4_weights, s4_biases, s4_shapes,
                                             s4_nonlinearities,
                                             ['fc1_s4', 'fc2_s4', 'fc3_s4', 'bottleneck_s4'])
    s4_len = las.layers.get_output_shape(l_encoder_s4)[-1]
    l_reshape2_s4 = ReshapeLayer(l_encoder_s4,
                                 (symbolic_batchsize_s4, symbolic_seqlen_s4, s4_len),
                                 name='reshape2_s4')
    l_delta_s4 = DeltaLayer(l_reshape2_s4, win, name='delta_s4')

    # s5 images
    l_reshape1_s5 = ReshapeLayer(l_s5, (-1, s5_shape[-1]), name='reshape1_s5')
    l_encoder_s5 = create_pretrained_encoder(l_reshape1_s5, s5_weights, s5_biases, s5_shapes,
                                             s5_nonlinearities,
                                             ['fc1_s5', 'fc2_s5', 'fc3_s5', 'bottleneck_s5'])
    s5_len = las.layers.get_output_shape(l_encoder_s5)[-1]
    l_reshape2_s5 = ReshapeLayer(l_encoder_s5,
                                 (symbolic_batchsize_s5, symbolic_seqlen_s5, s5_len),
                                 name='reshape2_s5')
    l_delta_s5 = DeltaLayer(l_reshape2_s5, win, name='delta_s5')

    if not use_blstm_substream:
        l_lstm_s1 = create_pretrained_lstm(s1_lstm, 'f_lstm', l_delta_s1,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'f_lstm_s1', use_peepholes)

        l_lstm_s2 = create_pretrained_lstm(s2_lstm, 'f_lstm', l_delta_s2,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'f_lstm_s2', use_peepholes)

        l_lstm_s3 = create_pretrained_lstm(s3_lstm, 'f_lstm', l_delta_s3,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'f_lstm_s3', use_peepholes)
        l_lstm_s4 = create_pretrained_lstm(s4_lstm, 'f_lstm', l_delta_s4,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'f_lstm_s4', use_peepholes)
        l_lstm_s5 = create_pretrained_lstm(s5_lstm, 'f_lstm', l_delta_s5,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'f_lstm_s5', use_peepholes)

    else:
        f_lstm_s1 = create_pretrained_lstm(s1_lstm, 'f_lstm', l_delta_s1,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'f_lstm_s1', use_peepholes)
        b_lstm_s1 = create_pretrained_lstm(s1_lstm, 'b_lstm', l_delta_s1,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'b_lstm_s1', use_peepholes, backwards=True)
        l_lstm_s1 = ElemwiseSumLayer([f_lstm_s1, b_lstm_s1], name='sum_b_lstm_s1')

        f_lstm_s2 = create_pretrained_lstm(s2_lstm, 'f_lstm', l_delta_s2,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'f_lstm_s2', use_peepholes)
        b_lstm_s2 = create_pretrained_lstm(s2_lstm, 'b_lstm', l_delta_s2,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'b_lstm_s2', use_peepholes, backwards=True)
        l_lstm_s2 = ElemwiseSumLayer([f_lstm_s2, b_lstm_s2], name='sum_b_lstm_s2')

        f_lstm_s3 = create_pretrained_lstm(s3_lstm, 'f_lstm', l_delta_s3,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'f_lstm_s3', use_peepholes)
        b_lstm_s3 = create_pretrained_lstm(s3_lstm, 'b_lstm', l_delta_s3,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'b_lstm_s3', use_peepholes, backwards=True)
        l_lstm_s3 = ElemwiseSumLayer([f_lstm_s3, b_lstm_s3], name='sum_b_lstm_s3')

        f_lstm_s4 = create_pretrained_lstm(s4_lstm, 'f_lstm', l_delta_s4,
                                       l_mask, lstm_size, cell_parameters, gate_parameters,
                                       'f_lstm_s4', use_peepholes)
        b_lstm_s4 = create_pretrained_lstm(s4_lstm, 'b_lstm', l_delta_s4,
                                       l_mask, lstm_size, cell_parameters, gate_parameters,
                                       'b_lstm_s4', use_peepholes, backwards=True)
        l_lstm_s4 = ElemwiseSumLayer([f_lstm_s4, b_lstm_s4], name='sum_b_lstm_s4')


        f_lstm_s5 = create_pretrained_lstm(s5_lstm, 'f_lstm', l_delta_s5,
                                       l_mask, lstm_size, cell_parameters, gate_parameters,
                                       'f_lstm_s5', use_peepholes)
        b_lstm_s5 = create_pretrained_lstm(s5_lstm, 'b_lstm', l_delta_s5,
                                       l_mask, lstm_size, cell_parameters, gate_parameters,
                                       'b_lstm_s5', use_peepholes, backwards=True)
        l_lstm_s5 = ElemwiseSumLayer([f_lstm_s5, b_lstm_s5], name='sum_b_lstm_s5')


    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    if fusiontype == 'adasum':
        l_fuse = AdaptiveElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3,l_lstm_s4,l_lstm_s5], name='adasum1')
    elif fusiontype == 'sum':
        l_fuse = ElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3,l_lstm_s4,l_lstm_s5], name='sum1')
    elif fusiontype == 'concat':
        l_fuse = ConcatLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3,l_lstm_s4,l_lstm_s5], axis=-1, name='concat')

    f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg')
    l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')

    # reshape to (num_examples * seq_len, lstm_size)
    l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape3, num_units=output_classes,
        nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_s1, output_classes), name='output')

    return l_out, l_fuse

示例#10

0

显示文件

文件： deltanet_majority_vote.py 项目： redforg/end-to-end-multiview-lipreading

def load_saved_model(model_path, stream_params, input_shape, input_var, mask_shape, mask_var,
                     lstm_size=250, win=T.iscalar('theta)'),
                     output_classes=26, w_init_fn=GlorotUniform(), use_peepholes=False, use_blstm=True):
    """
    loads a saved model
    :param model_path: path to model parameters
    :param stream_params: stream parameters in a tuple of
    ([layer 1 dimension, ..., layer N dimension], [layer 1 nonlinearity, ..., layer N nonlinearity]
    :param input_shape: input shape eg: (None, None, 1500)
    :param input_var: input theano variable
    :param mask_shape: mask shape eg: (None, None) if variable lengths
    :param mask_var: mask theano variable
    :param lstm_size: number of lstm units for lstm layer
    :param win: window theano variable
    :param output_classes: number of output classes
    :param w_init_fn: weight initialization function used for initializing model
    :param use_peepholes: use peepholes for lstm layers
    :return: saved model
    """

    shapes, nonlinearities = stream_params

    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_encoder(l_reshape1, shapes, nonlinearities, ['fc1', 'fc2', 'fc3', 'bottleneck'])
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2')
    l_delta = DeltaLayer(l_reshape2, win, name='delta')

    if use_blstm:
        l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm',
                                           use_peepholes)

        # We'll combine the forward and backward layer output by summing.
        # Merge layers take in lists of layers to merge as input.
        l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1')
        # reshape, flatten to 2 dimensions to run softmax on all timesteps
        l_reshape3 = ReshapeLayer(l_sum1, (-1, lstm_size), name='reshape3')
    else:
        l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
        l_reshape3 = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape3')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output')
    load_model_params(l_out, model_path)
    return l_out