Exemplo n.º 1
0
def _lstm(X, settings, parameters, memory):
  n_filters = settings['n_filters'] * 4
  X_weight, h_weight, bias = parameters
  previous_h, previous_c = memory
  if previous_h is 0:
    array = _lstm_convolution(X, n_filters, X_weight)
    array = layers.broadcast_plus(array, bias)
  else:
    array = _lstm_convolution(X, n_filters, X_weight) + _lstm_convolution(previous_h, n_filters, h_weight)
    array = layers.broadcast_plus(array, bias)
  group = layers.slice(X=array, axis=1, n_outputs=4)
  i = layers.sigmoid(group[0])
  f = layers.sigmoid(group[1])
  o = layers.sigmoid(group[2])
  g = layers.sigmoid(group[3])
  next_c = f * previous_c + i * g
  next_h = o * layers.tanh(next_c)
  memory = next_h, next_c
  return memory
Exemplo n.º 2
0
def lstm(X, n_filters, cache):
    WX = cache.setdefault('WX', layers.variable('X_weight'))
    WH = cache.setdefault('WH', layers.variable('H_weight'))
    bias = cache.setdefault(
        'bias', layers.variable('lstm_bias', shape=(1, n_filters * 4, 1, 1)))

    network = _rnn_convolution(X, n_filters * 4, WH) + \
      (_rnn_convolution(cache['h'], n_filters * 4, WH) if 'h' in cache else 0)
    network = layers.broadcast_plus(network, bias)

    group = layers.slice(X=network, axis=1, n_outputs=4)
    i = layers.sigmoid(group[0])
    f = layers.sigmoid(group[1])
    o = layers.sigmoid(group[2])
    g = layers.tanh(group[3])

    cache['c'] = f * cache.get('c', 0) + i * g
    cache['h'] = o * layers.tanh(cache['c'])

    return cache
Exemplo n.º 3
0
def lstm(X, D, cache):
  time = cache.setdefault('time', -1)
  cache['time'] += 1

  WX = cache.setdefault('WX', layers.variable('X_weight'))
  WH = cache.setdefault('WH', layers.variable('H_weight'))
  bias = cache.setdefault('bias', layers.variable('lstm_bias', shape=(1, D * 4)))

  network = _rnn_linearity(X, D * 4, WX) + (_rnn_linearity(cache['h'], D * 4, WH) if 'h' in cache else 0)
  network = layers.broadcast_plus(network, bias)

  group = layers.slice(X=network, axis=1, n_outputs=4)
  i = layers.sigmoid(group[0])
  f = layers.sigmoid(group[1])
  o = layers.sigmoid(group[2])
  g = layers.tanh(group[3])

  cache['c'] = f * cache.get('c', 0) + i * g
  cache['h'] = o * layers.tanh(cache['c'])
 
  return cache