Exemplos de create_pretrained_encoder em Python, exemplos de modelzoo.pretrained_encoder.create_pretrained_encoder em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: deltanet_majority_vote.py Projeto: redforg/end-to-end-multiview-lipreading

def create_model(dbn, input_shape, input_var, mask_shape, mask_var,
                 lstm_size=250, win=T.iscalar('theta)'),
                 output_classes=26, w_init_fn=GlorotUniform, use_peepholes=False, use_blstm=True):

    weights, biases, shapes, nonlinearities = dbn

    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_pretrained_encoder(l_reshape1, weights, biases,
                                          shapes,
                                          nonlinearities,
                                          ['fc1', 'fc2', 'fc3', 'bottleneck'])
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2')
    l_delta = DeltaLayer(l_reshape2, win, name='delta')

    if use_blstm:
        l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'blstm1',
                                           use_peepholes)

        # We'll combine the forward and backward layer output by summing.
        # Merge layers take in lists of layers to merge as input.
        l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1')
        # reshape, flatten to 2 dimensions to run softmax on all timesteps
        l_reshape3 = ReshapeLayer(l_sum1, (-1, lstm_size), name='reshape3')
    else:
        l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
        l_reshape3 = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape3')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output')

    return l_out

Exemplo n.º 2

0

Exibir arquivo

Arquivo: deltanet.py Projeto: lzuwei/ip-avsr

def create_model_using_pretrained_encoder(weights, biases, input_shape, input_var, mask_shape, mask_var,
                                          lstm_size=250, win=T.iscalar('theta'), output_classes=26,
                                          w_init_fn=las.init.Orthogonal(),
                                          use_peepholes=False, nonlinearities=rectify):
    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_pretrained_encoder(l_reshape1, weights, biases,
                                          [2000, 1000, 500, 50],
                                          [nonlinearities, nonlinearities, nonlinearities, linear],
                                          ['fc1', 'fc2', 'fc3', 'bottleneck'])
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2')
    l_delta = DeltaLayer(l_reshape2, win, name='delta')

    l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'bstm1',
                                       use_peepholes)

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1')

    l_forward_slice1 = SliceLayer(l_sum1, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_out = DenseLayer(
        l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output')

    return l_out

Exemplo n.º 3

0

Exibir arquivo

Arquivo: adenet_3stream_dropout.py Projeto: redforg/end-to-end-multiview-lipreading

def create_model(s1_ae,
                 s2_ae,
                 s3_ae,
                 s1_shape,
                 s1_var,
                 s2_shape,
                 s2_var,
                 s3_shape,
                 s3_var,
                 mask_shape,
                 mask_var,
                 lstm_size=250,
                 lstm2_size=250,
                 win=T.iscalar('theta)'),
                 output_classes=26,
                 fusiontype='concat',
                 w_init_fn=las.init.Orthogonal(),
                 use_peepholes=True):

    s1_bn_weights, s1_bn_biases, s1_bn_shapes, s1_bn_nonlinearities = s1_ae
    s2_weights, s2_biases, s2_shapes, s2_nonlinearities = s2_ae
    s3_weights, s3_biases, s3_shapes, s3_nonlinearities = s3_ae

    gate_parameters = Gate(W_in=w_init_fn,
                           W_hid=w_init_fn,
                           b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn,
        W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None,
        b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_s1 = InputLayer(s1_shape, s1_var, 's1_im')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')
    l_s2 = InputLayer(s2_shape, s2_var, 's2_im')
    l_s3 = InputLayer(s3_shape, s3_var, 's3_im')

    symbolic_batchsize_s1 = l_s1.input_var.shape[0]
    symbolic_seqlen_s1 = l_s1.input_var.shape[1]
    symbolic_batchsize_s2 = l_s2.input_var.shape[0]
    symbolic_seqlen_s2 = l_s2.input_var.shape[1]
    symbolic_batchsize_s3 = l_s3.input_var.shape[0]
    symbolic_seqlen_s3 = l_s3.input_var.shape[1]

    l_reshape1_s1 = ReshapeLayer(l_s1, (-1, s1_shape[-1]), name='reshape1_s1')
    l_encoder_s1 = create_pretrained_encoder(
        l_reshape1_s1, s1_bn_weights, s1_bn_biases, s1_bn_shapes,
        s1_bn_nonlinearities, ['fc1_s1', 'fc2_s1', 'fc3_s1', 'bottleneck_s1'])
    s1_len = las.layers.get_output_shape(l_encoder_s1)[-1]

    l_reshape2_s1 = ReshapeLayer(
        l_encoder_s1, (symbolic_batchsize_s1, symbolic_seqlen_s1, s1_len),
        name='reshape2_s1')
    l_delta_s1 = DeltaLayer(l_reshape2_s1, win, name='delta_s1')
    l_delta_s1_dropout = DropoutLayer(l_delta_s1, name='dropout_s1')

    # s2 images
    l_reshape1_s2 = ReshapeLayer(l_s2, (-1, s2_shape[-1]), name='reshape1_s2')
    l_encoder_s2 = create_pretrained_encoder(
        l_reshape1_s2, s2_weights, s2_biases, s2_shapes, s2_nonlinearities,
        ['fc1_s2', 'fc2_s2', 'fc3_s2', 'bottleneck_s2'])
    s2_len = las.layers.get_output_shape(l_encoder_s2)[-1]
    l_reshape2_s2 = ReshapeLayer(
        l_encoder_s2, (symbolic_batchsize_s2, symbolic_seqlen_s2, s2_len),
        name='reshape2_s2')
    l_delta_s2 = DeltaLayer(l_reshape2_s2, win, name='delta_s2')
    l_delta_s2_dropout = DropoutLayer(l_delta_s2, name='dropout_s2')

    # s3 images
    l_reshape1_s3 = ReshapeLayer(l_s3, (-1, s3_shape[-1]), name='reshape1_s3')
    l_encoder_s3 = create_pretrained_encoder(
        l_reshape1_s3, s3_weights, s3_biases, s3_shapes, s3_nonlinearities,
        ['fc1_s3', 'fc2_s3', 'fc3_s3', 'bottleneck_s3'])
    s3_len = las.layers.get_output_shape(l_encoder_s3)[-1]
    l_reshape2_s3 = ReshapeLayer(
        l_encoder_s3, (symbolic_batchsize_s3, symbolic_seqlen_s3, s3_len),
        name='reshape2_s3')
    l_delta_s3 = DeltaLayer(l_reshape2_s3, win, name='delta_s3')
    l_delta_s3_dropout = DropoutLayer(l_delta_s3, name='dropout_s3')

    l_lstm_s1 = LSTMLayer(
        l_delta_s1_dropout,
        lstm_size * 2,
        peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters,
        forgetgate=gate_parameters,
        cell=cell_parameters,
        outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True,
        grad_clipping=5.,
        name='lstm_s1')

    l_lstm_s2 = LSTMLayer(
        l_delta_s2_dropout,
        lstm_size * 2,
        peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters,
        forgetgate=gate_parameters,
        cell=cell_parameters,
        outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True,
        grad_clipping=5.,
        name='lstm_s2')

    l_lstm_s3 = LSTMLayer(
        l_delta_s3_dropout,
        lstm_size * 2,
        peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters,
        forgetgate=gate_parameters,
        cell=cell_parameters,
        outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True,
        grad_clipping=5.,
        name='lstm_s3')

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    if fusiontype == 'adasum':
        l_fuse = AdaptiveElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3],
                                          name='adasum1')
    elif fusiontype == 'sum':
        l_fuse = ElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3],
                                  name='sum1')
    elif fusiontype == 'concat':
        l_fuse = ConcatLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3],
                             axis=-1,
                             name='concat')

    l_fuse_dropout = DropoutLayer(l_fuse, name='concat_dropout')
    f_lstm_agg, b_lstm_agg = create_blstm(l_fuse_dropout, l_mask, lstm2_size,
                                          cell_parameters, gate_parameters,
                                          'lstm_agg')
    l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')

    # reshape to (num_examples * seq_len, lstm_size)
    l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size * 2), name='reshape3')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(l_reshape3,
                           num_units=output_classes,
                           nonlinearity=las.nonlinearities.softmax,
                           name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_s1, output_classes),
                         name='output')

    return l_out, l_fuse

Exemplo n.º 4

0

Exibir arquivo

def create_model(ae,
                 diff_ae,
                 input_shape,
                 input_var,
                 mask_shape,
                 mask_var,
                 diff_shape,
                 diff_var,
                 lstm_size=250,
                 win=T.iscalar('theta)'),
                 output_classes=26,
                 fusiontype='concat',
                 w_init_fn=las.init.Orthogonal(),
                 use_peepholes=True):

    bn_weights, bn_biases, bn_shapes, bn_nonlinearities = extract_weights(ae)
    diff_weights, diff_biases, diff_shapes, diff_nonlinearities = extract_weights(
        diff_ae)

    gate_parameters = Gate(W_in=w_init_fn,
                           W_hid=w_init_fn,
                           b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn,
        W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None,
        b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_raw = InputLayer(input_shape, input_var, 'raw_im')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')
    l_diff = InputLayer(diff_shape, diff_var, 'diff_im')

    symbolic_batchsize_raw = l_raw.input_var.shape[0]
    symbolic_seqlen_raw = l_raw.input_var.shape[1]
    symbolic_batchsize_diff = l_diff.input_var.shape[0]
    symbolic_seqlen_diff = l_diff.input_var.shape[1]

    l_reshape1_raw = ReshapeLayer(l_raw, (-1, input_shape[-1]),
                                  name='reshape1_raw')
    l_encoder_raw = create_pretrained_encoder(
        l_reshape1_raw, bn_weights, bn_biases, bn_shapes, bn_nonlinearities,
        ['fc1_raw', 'fc2_raw', 'fc3_raw', 'bottleneck_raw'])
    raw_len = las.layers.get_output_shape(l_encoder_raw)[-1]

    l_reshape2_raw = ReshapeLayer(
        l_encoder_raw, (symbolic_batchsize_raw, symbolic_seqlen_raw, raw_len),
        name='reshape2_raw')
    l_delta_raw = DeltaLayer(l_reshape2_raw, win, name='delta_raw')

    # diff images
    l_reshape1_diff = ReshapeLayer(l_diff, (-1, diff_shape[-1]),
                                   name='reshape1_diff')
    l_encoder_diff = create_pretrained_encoder(
        l_reshape1_diff, diff_weights, diff_biases, diff_shapes,
        diff_nonlinearities,
        ['fc1_diff', 'fc2_diff', 'fc3_diff', 'bottleneck_diff'])
    diff_len = las.layers.get_output_shape(l_encoder_diff)[-1]
    l_reshape2_diff = ReshapeLayer(
        l_encoder_diff,
        (symbolic_batchsize_diff, symbolic_seqlen_diff, diff_len),
        name='reshape2_diff')
    l_delta_diff = DeltaLayer(l_reshape2_diff, win, name='delta_diff')

    l_lstm_raw = LSTMLayer(
        l_delta_raw,
        int(lstm_size),
        peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters,
        forgetgate=gate_parameters,
        cell=cell_parameters,
        outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True,
        grad_clipping=5.,
        name='lstm_raw')

    l_lstm_diff = LSTMLayer(
        l_delta_diff,
        lstm_size,
        peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters,
        forgetgate=gate_parameters,
        cell=cell_parameters,
        outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True,
        grad_clipping=5.,
        name='lstm_diff')

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    if fusiontype == 'adasum':
        l_fuse = AdaptiveElemwiseSumLayer([l_lstm_raw, l_lstm_diff],
                                          name='adasum1')
    elif fusiontype == 'sum':
        l_fuse = ElemwiseSumLayer([l_lstm_raw, l_lstm_diff], name='sum1')
    elif fusiontype == 'concat':
        l_fuse = ConcatLayer([l_lstm_raw, l_lstm_diff], axis=-1, name='concat')

    # l_drop_agg = DropoutLayer(l_sum1, name='dropout_agg')

    f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size,
                                          cell_parameters, gate_parameters,
                                          'lstm_agg')
    l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')
    '''
    l_lstm_agg = LSTMLayer(
        l_drop_agg, lstm_size * 2,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm_agg')

    # implement drop-out regularization
    l_dropout = DropoutLayer(l_sum1, p=0.4, name='dropout1')

    l_lstm2, l_lstm2_back = create_blstm(l_dropout, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm2')

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    l_sum2 = ElemwiseSumLayer([l_lstm2, l_lstm2_back])
    '''

    # l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')

    l_forward_slice1 = SliceLayer(l_sum2, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_out = DenseLayer(l_forward_slice1,
                       num_units=output_classes,
                       nonlinearity=las.nonlinearities.softmax,
                       name='output')

    return l_out, l_fuse

Exemplo n.º 5

0

Exibir arquivo

Arquivo: adenet_v2_3.py Projeto: behtak/ip-avsr

def create_model(dbn, input_shape, input_var, mask_shape, mask_var,
                 dct_shape, dct_var, lstm_size=250, win=T.iscalar('theta)'),
                 output_classes=26, fusiontype='sum', w_init_fn=las.init.Orthogonal(),
                 use_peepholes=True):

    dbn_layers = dbn.get_all_layers()
    weights = []
    biases = []
    shapes = [2000, 1000, 500, 50]
    nonlinearities = [rectify, rectify, rectify, linear]
    weights.append(dbn_layers[1].W.astype('float32'))
    weights.append(dbn_layers[2].W.astype('float32'))
    weights.append(dbn_layers[3].W.astype('float32'))
    weights.append(dbn_layers[4].W.astype('float32'))
    biases.append(dbn_layers[1].b.astype('float32'))
    biases.append(dbn_layers[2].b.astype('float32'))
    biases.append(dbn_layers[3].b.astype('float32'))
    biases.append(dbn_layers[4].b.astype('float32'))

    gate_parameters = Gate(
        W_in=las.init.Orthogonal(), W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')
    l_dct = InputLayer(dct_shape, dct_var, 'dct')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_pretrained_encoder(l_reshape1, weights, biases, shapes, nonlinearities,
                                          ['fc1', 'fc2', 'fc3', 'bottleneck'])
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2')
    l_delta = DeltaLayer(l_reshape2, win, name='delta')

    l_lstm_bn = LSTMLayer(
        l_delta, lstm_size, peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm_bn')

    l_lstm_dct = LSTMLayer(
        l_dct, lstm_size, peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm_dct')

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.

    if fusiontype == 'sum':
        l_fuse = ElemwiseSumLayer([l_lstm_bn, l_lstm_dct], name='sum1')
    elif fusiontype == 'adasum':
        l_fuse = AdaptiveElemwiseSumLayer([l_lstm_bn, l_lstm_dct], name='adasum')
    elif fusiontype == 'concat':
        l_fuse = ConcatLayer([l_lstm_bn, l_lstm_dct], axis=2, name='concat')

    f_lstm_agg = create_lstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg')

    # reshape to (num_examples * seq_len, lstm_size)
    l_reshape3 = ReshapeLayer(f_lstm_agg, (-1, lstm_size))

    # l_forward_slice1 = SliceLayer(l_sum2, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output')

    return l_out, l_fuse

Exemplo n.º 6

0

Exibir arquivo

Arquivo: adenet_v2_1.py Projeto: behtak/ip-avsr

def create_model(ae, diff_ae, input_shape, input_var, mask_shape, mask_var,
                 diff_shape, diff_var, lstm_size=250, win=T.iscalar('theta)'),
                 output_classes=26, fusiontype='concat', w_init_fn=las.init.Orthogonal(),
                 use_peepholes=True):

    bn_weights, bn_biases, bn_shapes, bn_nonlinearities = extract_weights(ae)
    diff_weights, diff_biases, diff_shapes, diff_nonlinearities = extract_weights(diff_ae)

    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_raw = InputLayer(input_shape, input_var, 'raw_im')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')
    l_diff = InputLayer(diff_shape, diff_var, 'diff_im')

    symbolic_batchsize_raw = l_raw.input_var.shape[0]
    symbolic_seqlen_raw = l_raw.input_var.shape[1]
    symbolic_batchsize_diff = l_diff.input_var.shape[0]
    symbolic_seqlen_diff = l_diff.input_var.shape[1]

    l_reshape1_raw = ReshapeLayer(l_raw, (-1, input_shape[-1]), name='reshape1_raw')
    l_encoder_raw = create_pretrained_encoder(l_reshape1_raw, bn_weights, bn_biases, bn_shapes, bn_nonlinearities,
                                              ['fc1_raw', 'fc2_raw', 'fc3_raw', 'bottleneck_raw'])
    raw_len = las.layers.get_output_shape(l_encoder_raw)[-1]

    l_reshape2_raw = ReshapeLayer(l_encoder_raw,
                                  (symbolic_batchsize_raw, symbolic_seqlen_raw, raw_len),
                                  name='reshape2_raw')
    l_delta_raw = DeltaLayer(l_reshape2_raw, win, name='delta_raw')

    # diff images
    l_reshape1_diff = ReshapeLayer(l_diff, (-1, diff_shape[-1]), name='reshape1_diff')
    l_encoder_diff = create_pretrained_encoder(l_reshape1_diff, diff_weights, diff_biases, diff_shapes,
                                               diff_nonlinearities,
                                               ['fc1_diff', 'fc2_diff', 'fc3_diff', 'bottleneck_diff'])
    diff_len = las.layers.get_output_shape(l_encoder_diff)[-1]
    l_reshape2_diff = ReshapeLayer(l_encoder_diff,
                                   (symbolic_batchsize_diff, symbolic_seqlen_diff, diff_len),
                                   name='reshape2_diff')
    l_delta_diff = DeltaLayer(l_reshape2_diff, win, name='delta_diff')

    l_lstm_raw = LSTMLayer(
        l_delta_raw, int(lstm_size), peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm_raw')

    l_lstm_diff = LSTMLayer(
        l_delta_diff, lstm_size, peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm_diff')

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    if fusiontype == 'adasum':
        l_fuse = AdaptiveElemwiseSumLayer([l_lstm_raw, l_lstm_diff], name='adasum1')
    elif fusiontype == 'sum':
        l_fuse = ElemwiseSumLayer([l_lstm_raw, l_lstm_diff], name='sum1')
    elif fusiontype == 'concat':
        l_fuse = ConcatLayer([l_lstm_raw, l_lstm_diff], axis=-1, name='concat')

    # l_drop_agg = DropoutLayer(l_sum1, name='dropout_agg')

    f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg')
    l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')


    '''
    l_lstm_agg = LSTMLayer(
        l_drop_agg, lstm_size * 2,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm_agg')

    # implement drop-out regularization
    l_dropout = DropoutLayer(l_sum1, p=0.4, name='dropout1')

    l_lstm2, l_lstm2_back = create_blstm(l_dropout, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm2')

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    l_sum2 = ElemwiseSumLayer([l_lstm2, l_lstm2_back])
    '''

    # l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')

    l_forward_slice1 = SliceLayer(l_sum2, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_out = DenseLayer(
        l_forward_slice1, num_units=output_classes,
        nonlinearity=las.nonlinearities.softmax, name='output')

    return l_out, l_fuse

Exemplo n.º 7

0

Exibir arquivo

Arquivo: adenet_v2.py Projeto: konatasick/ip-avsr

def create_model(dbn,
                 input_shape,
                 input_var,
                 mask_shape,
                 mask_var,
                 dct_shape,
                 dct_var,
                 lstm_size=250,
                 win=T.iscalar('theta)'),
                 output_classes=26,
                 fusiontype='sum',
                 w_init_fn=las.init.GlorotUniform(),
                 use_peepholes=False,
                 nonlinearities=rectify):

    weights, biases, shapes, nonlinearities = dbn
    names = ['fc1', 'fc2', 'fc3', 'bottleneck']

    gate_parameters = Gate(W_in=w_init_fn,
                           W_hid=w_init_fn,
                           b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn,
        W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None,
        b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_in = InputLayer(input_shape, input_var, 'input')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')
    l_dct = InputLayer(dct_shape, dct_var, 'dct')

    symbolic_batchsize = l_in.input_var.shape[0]
    symbolic_seqlen = l_in.input_var.shape[1]

    l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1')
    l_encoder = create_pretrained_encoder(l_reshape1, weights, biases, shapes,
                                          nonlinearities, names)
    encoder_len = las.layers.get_output_shape(l_encoder)[-1]
    l_reshape2 = ReshapeLayer(
        l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len),
        name='reshape2')
    l_delta = DeltaLayer(l_reshape2, win, name='delta')

    l_delta_dct = DeltaLayer(l_dct, win, name='delta_dct')

    l_lstm_bn = LSTMLayer(
        l_delta,
        lstm_size,
        peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters,
        forgetgate=gate_parameters,
        cell=cell_parameters,
        outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True,
        grad_clipping=5.,
        name='lstm_bn')

    l_lstm_dct = LSTMLayer(
        l_delta_dct,
        lstm_size,
        peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters,
        forgetgate=gate_parameters,
        cell=cell_parameters,
        outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True,
        grad_clipping=5.,
        name='lstm_dct')

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.

    if fusiontype == 'sum':
        l_fuse = ElemwiseSumLayer([l_lstm_bn, l_lstm_dct], name='sum1')
    elif fusiontype == 'adasum':
        l_fuse = AdaptiveElemwiseSumLayer([l_lstm_bn, l_lstm_dct],
                                          name='adasum')
    elif fusiontype == 'concat':
        l_fuse = ConcatLayer([l_lstm_bn, l_lstm_dct], axis=2, name='concat')
    else:
        raise ValueError(message='Unsupported Fusion Type used!')

    f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size,
                                          cell_parameters, gate_parameters,
                                          'lstm_agg')

    l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')

    # reshape to (num_examples * seq_len, lstm_size)
    l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3')

    # l_forward_slice1 = SliceLayer(l_sum2, -1, 1, name='slice1')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(l_reshape3,
                           num_units=output_classes,
                           nonlinearity=las.nonlinearities.softmax,
                           name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes),
                         name='output')

    return l_out, l_fuse

Exemplo n.º 8

0

Exibir arquivo

Arquivo: adenet_3stream.py Projeto: behtak/ip-avsr

def create_model(s1_ae, s2_ae, s3_ae, s1_shape, s1_var,
                 s2_shape, s2_var, s3_shape, s3_var,
                 mask_shape, mask_var,
                 lstm_size=250, win=T.iscalar('theta)'),
                 output_classes=26, fusiontype='concat', w_init_fn=las.init.Orthogonal(),
                 use_peepholes=True):

    s1_bn_weights, s1_bn_biases, s1_bn_shapes, s1_bn_nonlinearities = s1_ae
    s2_weights, s2_biases, s2_shapes, s2_nonlinearities = s2_ae
    s3_weights, s3_biases, s3_shapes, s3_nonlinearities = s3_ae

    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_s1 = InputLayer(s1_shape, s1_var, 's1_im')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')
    l_s2 = InputLayer(s2_shape, s2_var, 's2_im')
    l_s3 = InputLayer(s3_shape, s3_var, 's3_im')

    symbolic_batchsize_s1 = l_s1.input_var.shape[0]
    symbolic_seqlen_s1 = l_s1.input_var.shape[1]
    symbolic_batchsize_s2 = l_s2.input_var.shape[0]
    symbolic_seqlen_s2 = l_s2.input_var.shape[1]
    symbolic_batchsize_s3 = l_s3.input_var.shape[0]
    symbolic_seqlen_s3 = l_s3.input_var.shape[1]

    l_reshape1_s1 = ReshapeLayer(l_s1, (-1, s1_shape[-1]), name='reshape1_s1')
    l_encoder_s1 = create_pretrained_encoder(l_reshape1_s1, s1_bn_weights, s1_bn_biases, s1_bn_shapes, s1_bn_nonlinearities,
                                              ['fc1_s1', 'fc2_s1', 'fc3_s1', 'bottleneck_s1'])
    s1_len = las.layers.get_output_shape(l_encoder_s1)[-1]

    l_reshape2_s1 = ReshapeLayer(l_encoder_s1,
                                 (symbolic_batchsize_s1, symbolic_seqlen_s1, s1_len),
                                 name='reshape2_s1')
    l_delta_s1 = DeltaLayer(l_reshape2_s1, win, name='delta_s1')

    # s2 images
    l_reshape1_s2 = ReshapeLayer(l_s2, (-1, s2_shape[-1]), name='reshape1_s2')
    l_encoder_s2 = create_pretrained_encoder(l_reshape1_s2, s2_weights, s2_biases, s2_shapes,
                                             s2_nonlinearities,
                                             ['fc1_s2', 'fc2_s2', 'fc3_s2', 'bottleneck_s2'])
    s2_len = las.layers.get_output_shape(l_encoder_s2)[-1]
    l_reshape2_s2 = ReshapeLayer(l_encoder_s2,
                                 (symbolic_batchsize_s2, symbolic_seqlen_s2, s2_len),
                                 name='reshape2_s2')
    l_delta_s2 = DeltaLayer(l_reshape2_s2, win, name='delta_s2')

    # s3 images
    l_reshape1_s3 = ReshapeLayer(l_s3, (-1, s3_shape[-1]), name='reshape1_s3')
    l_encoder_s3 = create_pretrained_encoder(l_reshape1_s3, s3_weights, s3_biases, s3_shapes,
                                             s3_nonlinearities,
                                             ['fc1_s3', 'fc2_s3', 'fc3_s3', 'bottleneck_s3'])
    s3_len = las.layers.get_output_shape(l_encoder_s3)[-1]
    l_reshape2_s3 = ReshapeLayer(l_encoder_s3,
                                 (symbolic_batchsize_s3, symbolic_seqlen_s3, s3_len),
                                 name='reshape2_s3')
    l_delta_s3 = DeltaLayer(l_reshape2_s3, win, name='delta_s3')

    l_lstm_s1 = LSTMLayer(
        l_delta_s1, int(lstm_size), peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm_s1')

    l_lstm_s2 = LSTMLayer(
        l_delta_s2, lstm_size, peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm_s2')

    l_lstm_s3 = LSTMLayer(
        l_delta_s3, lstm_size, peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters, forgetgate=gate_parameters,
        cell=cell_parameters, outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True, grad_clipping=5., name='lstm_s3')

    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    if fusiontype == 'adasum':
        l_fuse = AdaptiveElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3], name='adasum1')
    elif fusiontype == 'sum':
        l_fuse = ElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3], name='sum1')
    elif fusiontype == 'concat':
        l_fuse = ConcatLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3], axis=-1, name='concat')

    f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg')
    l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')

    # reshape to (num_examples * seq_len, lstm_size)
    l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape3, num_units=output_classes,
        nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_s1, output_classes), name='output')

    return l_out, l_fuse

Exemplo n.º 9

0

Exibir arquivo

Arquivo: adenet_5stream.py Projeto: redforg/end-to-end-multiview-lipreading

def create_pretrained_model(s1_ae, s1_lstm,
                            s2_ae, s2_lstm,
                            s3_ae, s3_lstm,
                            s4_ae, s4_lstm,
                            s5_ae, s5_lstm,
                            s1_shape, s1_var,
                            s2_shape, s2_var,
                            s3_shape, s3_var,
                            s4_shape, s4_var,
                            s5_shape, s5_var,
                            mask_shape, mask_var,
                            lstm_size=250, win=T.iscalar('theta)'),
                            output_classes=26, fusiontype='concat', w_init_fn=las.init.Orthogonal(),
                            use_peepholes=True, use_blstm_substream=False):
    s1_bn_weights, s1_bn_biases, s1_bn_shapes, s1_bn_nonlinearities = s1_ae
    s2_weights, s2_biases, s2_shapes, s2_nonlinearities = s2_ae
    s3_weights, s3_biases, s3_shapes, s3_nonlinearities = s3_ae
    s4_weights, s4_biases, s4_shapes, s4_nonlinearities = s4_ae
    s5_weights, s5_biases, s5_shapes, s5_nonlinearities = s5_ae

    gate_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn, W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None, b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_s1 = InputLayer(s1_shape, s1_var, 's1_im')
    l_mask = InputLayer(mask_shape, mask_var, 'mask')
    l_s2 = InputLayer(s2_shape, s2_var, 's2_im')
    l_s3 = InputLayer(s3_shape, s3_var, 's3_im')
    l_s4 = InputLayer(s4_shape, s4_var, 's4_im')
    l_s5 = InputLayer(s5_shape, s5_var, 's5_im')

    symbolic_batchsize_s1 = l_s1.input_var.shape[0]
    symbolic_seqlen_s1 = l_s1.input_var.shape[1]
    symbolic_batchsize_s2 = l_s2.input_var.shape[0]
    symbolic_seqlen_s2 = l_s2.input_var.shape[1]
    symbolic_batchsize_s3 = l_s3.input_var.shape[0]
    symbolic_seqlen_s3 = l_s3.input_var.shape[1]
    symbolic_batchsize_s4 = l_s4.input_var.shape[0]
    symbolic_seqlen_s4 = l_s4.input_var.shape[1]
    symbolic_batchsize_s5 = l_s5.input_var.shape[0]
    symbolic_seqlen_s5 = l_s5.input_var.shape[1]

    l_reshape1_s1 = ReshapeLayer(l_s1, (-1, s1_shape[-1]), name='reshape1_s1')
    l_encoder_s1 = create_pretrained_encoder(l_reshape1_s1, s1_bn_weights, s1_bn_biases, s1_bn_shapes,
                                             s1_bn_nonlinearities,
                                             ['fc1_s1', 'fc2_s1', 'fc3_s1', 'bottleneck_s1'])
    s1_len = las.layers.get_output_shape(l_encoder_s1)[-1]

    l_reshape2_s1 = ReshapeLayer(l_encoder_s1,
                                 (symbolic_batchsize_s1, symbolic_seqlen_s1, s1_len),
                                 name='reshape2_s1')
    l_delta_s1 = DeltaLayer(l_reshape2_s1, win, name='delta_s1')

    # s2 images
    l_reshape1_s2 = ReshapeLayer(l_s2, (-1, s2_shape[-1]), name='reshape1_s2')
    l_encoder_s2 = create_pretrained_encoder(l_reshape1_s2, s2_weights, s2_biases, s2_shapes,
                                             s2_nonlinearities,
                                             ['fc1_s2', 'fc2_s2', 'fc3_s2', 'bottleneck_s2'])
    s2_len = las.layers.get_output_shape(l_encoder_s2)[-1]
    l_reshape2_s2 = ReshapeLayer(l_encoder_s2,
                                 (symbolic_batchsize_s2, symbolic_seqlen_s2, s2_len),
                                 name='reshape2_s2')
    l_delta_s2 = DeltaLayer(l_reshape2_s2, win, name='delta_s2')

    # s3 images
    l_reshape1_s3 = ReshapeLayer(l_s3, (-1, s3_shape[-1]), name='reshape1_s3')
    l_encoder_s3 = create_pretrained_encoder(l_reshape1_s3, s3_weights, s3_biases, s3_shapes,
                                             s3_nonlinearities,
                                             ['fc1_s3', 'fc2_s3', 'fc3_s3', 'bottleneck_s3'])
    s3_len = las.layers.get_output_shape(l_encoder_s3)[-1]
    l_reshape2_s3 = ReshapeLayer(l_encoder_s3,
                                 (symbolic_batchsize_s3, symbolic_seqlen_s3, s3_len),
                                 name='reshape2_s3')
    l_delta_s3 = DeltaLayer(l_reshape2_s3, win, name='delta_s3')

    # s4 images
    l_reshape1_s4 = ReshapeLayer(l_s4, (-1, s4_shape[-1]), name='reshape1_s4')
    l_encoder_s4 = create_pretrained_encoder(l_reshape1_s4, s4_weights, s4_biases, s4_shapes,
                                             s4_nonlinearities,
                                             ['fc1_s4', 'fc2_s4', 'fc3_s4', 'bottleneck_s4'])
    s4_len = las.layers.get_output_shape(l_encoder_s4)[-1]
    l_reshape2_s4 = ReshapeLayer(l_encoder_s4,
                                 (symbolic_batchsize_s4, symbolic_seqlen_s4, s4_len),
                                 name='reshape2_s4')
    l_delta_s4 = DeltaLayer(l_reshape2_s4, win, name='delta_s4')

    # s5 images
    l_reshape1_s5 = ReshapeLayer(l_s5, (-1, s5_shape[-1]), name='reshape1_s5')
    l_encoder_s5 = create_pretrained_encoder(l_reshape1_s5, s5_weights, s5_biases, s5_shapes,
                                             s5_nonlinearities,
                                             ['fc1_s5', 'fc2_s5', 'fc3_s5', 'bottleneck_s5'])
    s5_len = las.layers.get_output_shape(l_encoder_s5)[-1]
    l_reshape2_s5 = ReshapeLayer(l_encoder_s5,
                                 (symbolic_batchsize_s5, symbolic_seqlen_s5, s5_len),
                                 name='reshape2_s5')
    l_delta_s5 = DeltaLayer(l_reshape2_s5, win, name='delta_s5')

    if not use_blstm_substream:
        l_lstm_s1 = create_pretrained_lstm(s1_lstm, 'f_lstm', l_delta_s1,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'f_lstm_s1', use_peepholes)

        l_lstm_s2 = create_pretrained_lstm(s2_lstm, 'f_lstm', l_delta_s2,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'f_lstm_s2', use_peepholes)

        l_lstm_s3 = create_pretrained_lstm(s3_lstm, 'f_lstm', l_delta_s3,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'f_lstm_s3', use_peepholes)
        l_lstm_s4 = create_pretrained_lstm(s4_lstm, 'f_lstm', l_delta_s4,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'f_lstm_s4', use_peepholes)
        l_lstm_s5 = create_pretrained_lstm(s5_lstm, 'f_lstm', l_delta_s5,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'f_lstm_s5', use_peepholes)

    else:
        f_lstm_s1 = create_pretrained_lstm(s1_lstm, 'f_lstm', l_delta_s1,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'f_lstm_s1', use_peepholes)
        b_lstm_s1 = create_pretrained_lstm(s1_lstm, 'b_lstm', l_delta_s1,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'b_lstm_s1', use_peepholes, backwards=True)
        l_lstm_s1 = ElemwiseSumLayer([f_lstm_s1, b_lstm_s1], name='sum_b_lstm_s1')

        f_lstm_s2 = create_pretrained_lstm(s2_lstm, 'f_lstm', l_delta_s2,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'f_lstm_s2', use_peepholes)
        b_lstm_s2 = create_pretrained_lstm(s2_lstm, 'b_lstm', l_delta_s2,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'b_lstm_s2', use_peepholes, backwards=True)
        l_lstm_s2 = ElemwiseSumLayer([f_lstm_s2, b_lstm_s2], name='sum_b_lstm_s2')

        f_lstm_s3 = create_pretrained_lstm(s3_lstm, 'f_lstm', l_delta_s3,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'f_lstm_s3', use_peepholes)
        b_lstm_s3 = create_pretrained_lstm(s3_lstm, 'b_lstm', l_delta_s3,
                                           l_mask, lstm_size, cell_parameters, gate_parameters,
                                           'b_lstm_s3', use_peepholes, backwards=True)
        l_lstm_s3 = ElemwiseSumLayer([f_lstm_s3, b_lstm_s3], name='sum_b_lstm_s3')

        f_lstm_s4 = create_pretrained_lstm(s4_lstm, 'f_lstm', l_delta_s4,
                                       l_mask, lstm_size, cell_parameters, gate_parameters,
                                       'f_lstm_s4', use_peepholes)
        b_lstm_s4 = create_pretrained_lstm(s4_lstm, 'b_lstm', l_delta_s4,
                                       l_mask, lstm_size, cell_parameters, gate_parameters,
                                       'b_lstm_s4', use_peepholes, backwards=True)
        l_lstm_s4 = ElemwiseSumLayer([f_lstm_s4, b_lstm_s4], name='sum_b_lstm_s4')


        f_lstm_s5 = create_pretrained_lstm(s5_lstm, 'f_lstm', l_delta_s5,
                                       l_mask, lstm_size, cell_parameters, gate_parameters,
                                       'f_lstm_s5', use_peepholes)
        b_lstm_s5 = create_pretrained_lstm(s5_lstm, 'b_lstm', l_delta_s5,
                                       l_mask, lstm_size, cell_parameters, gate_parameters,
                                       'b_lstm_s5', use_peepholes, backwards=True)
        l_lstm_s5 = ElemwiseSumLayer([f_lstm_s5, b_lstm_s5], name='sum_b_lstm_s5')


    # We'll combine the forward and backward layer output by summing.
    # Merge layers take in lists of layers to merge as input.
    if fusiontype == 'adasum':
        l_fuse = AdaptiveElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3,l_lstm_s4,l_lstm_s5], name='adasum1')
    elif fusiontype == 'sum':
        l_fuse = ElemwiseSumLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3,l_lstm_s4,l_lstm_s5], name='sum1')
    elif fusiontype == 'concat':
        l_fuse = ConcatLayer([l_lstm_s1, l_lstm_s2, l_lstm_s3,l_lstm_s4,l_lstm_s5], axis=-1, name='concat')

    f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg')
    l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')

    # reshape to (num_examples * seq_len, lstm_size)
    l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3')

    # Now, we can apply feed-forward layers as usual.
    # We want the network to predict a classification for the sequence,
    # so we'll use a the number of classes.
    l_softmax = DenseLayer(
        l_reshape3, num_units=output_classes,
        nonlinearity=las.nonlinearities.softmax, name='softmax')

    l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_s1, output_classes), name='output')

    return l_out, l_fuse

Exemplo n.º 10

0

Exibir arquivo

Arquivo: avnet.py Projeto: konatasick/ip-avsr

def create_pretrained_substream(weights,
                                biases,
                                input_shape,
                                input_var,
                                mask_shape,
                                mask_var,
                                name,
                                lstm_size=250,
                                win=T.iscalar('theta'),
                                nonlinearity=rectify,
                                w_init_fn=las.init.Orthogonal(),
                                use_peepholes=True):
    gate_parameters = Gate(W_in=w_init_fn,
                           W_hid=w_init_fn,
                           b=las.init.Constant(0.))
    cell_parameters = Gate(
        W_in=w_init_fn,
        W_hid=w_init_fn,
        # Setting W_cell to None denotes that no cell connection will be used.
        W_cell=None,
        b=las.init.Constant(0.),
        # By convention, the cell nonlinearity is tanh in an LSTM.
        nonlinearity=tanh)

    l_input = InputLayer(input_shape, input_var, 'input_' + name)
    l_mask = InputLayer(mask_shape, mask_var, 'mask')

    symbolic_batchsize_raw = l_input.input_var.shape[0]
    symbolic_seqlen_raw = l_input.input_var.shape[1]

    l_reshape1_raw = ReshapeLayer(l_input, (-1, input_shape[-1]),
                                  name='reshape1_' + name)
    l_encoder_raw = create_pretrained_encoder(
        l_reshape1_raw, weights, biases, [2000, 1000, 500, 50],
        [nonlinearity, nonlinearity, nonlinearity, linear],
        ['fc1_' + name, 'fc2_' + name, 'fc3_' + name, 'bottleneck_' + name])
    input_len = las.layers.get_output_shape(l_encoder_raw)[-1]

    l_reshape2 = ReshapeLayer(
        l_encoder_raw,
        (symbolic_batchsize_raw, symbolic_seqlen_raw, input_len),
        name='reshape2_' + name)
    l_delta = DeltaLayer(l_reshape2, win, name='delta_' + name)

    l_lstm = LSTMLayer(
        l_delta,
        int(lstm_size),
        peepholes=use_peepholes,
        # We need to specify a separate input for masks
        mask_input=l_mask,
        # Here, we supply the gate parameters for each gate
        ingate=gate_parameters,
        forgetgate=gate_parameters,
        cell=cell_parameters,
        outgate=gate_parameters,
        # We'll learn the initialization and use gradient clipping
        learn_init=True,
        grad_clipping=5.,
        name='lstm_' + name)

    return l_lstm